1. mismatch and modelling

A. PACKAGES

library(rpostgis)
library(plyr)
library(RPostgreSQL)
library(adehabitatHR)
library(raster)
library(sf)
library(sp)
library(RgoogleMaps) 
library(png)
library(ggplot2)
library(gridExtra)
library(grid)
library(mgcv)
library(glmmTMB)
library(RCurl)
library(lme4)
library(nlme)
library(MuMIn)
library(piecewiseSEM)
library(Hmisc)

B. FUNCTIONS

'%!in%' <- function(x,y)!('%in%'(x,y)) # create 'not in' operator
lapply(list.files(pattern= '^1.*', path='./fncs/',full.names=TRUE), source)
source(list.files(pattern= '^00.*', path='./fncs/',full.names=TRUE))

C. DATA PREPARATION

C1. DATA IMPORT

point_data <- read.csv(paste0('../data/1_mismatch_and_modelling/gps_data.csv'), stringsAsFactors=FALSE)
kernel_data <- read.csv(paste0('../data/1_mismatch_and_modelling/kde_data.csv'), stringsAsFactors=FALSE)

C1.1 gps_data

Data set with absolute number and proportion of locations classified as forest/open habitat by TCD and CLC for every individual’s day and night locations

species: roe or red deer
population: unique identifier study area as defined in the eurodeer database
study_areas_id: unique identifier study area as defined in the eurodeer database
aniyr: unique identifier for animal-year
animals_id: unique identifier of animal as defined in the eurodeer database
sex: biological sex of the animal
time: day or night time
total: total number of locations per day/night and aniyr
clc0tcd0: number of locations in open habitat both for CLC and TCD per day/night and aniyr
clc1tcd1: number of locations in forest habitat both for CLC and TCD per day/night and aniyr
clc0tcd1: number of locations in open habitat for CLC and in forest habitat for TCD per day/night and aniyr
clc1tcd0: number of locations in forest habitat for CLC and in open habitat for TCD per day/night and aniyr
tcd_open_abs: absolute number of locations in open habitat according to TCD per day/night and aniyr
tcd_forest_abs: absolute number of locations in forest habitat according to TCD per day/night and aniyr
clc_open_abs: absolute number of locations in open habitat according to CLC per day/night and aniyr
clc_forest_abs: absolute number of locations in forest habitat according to CLC per day/night and aniyr
clc0tcd0_prop: proportion of locations in open habitat both for CLC and TCD per day/night and aniyr
clc1tcd1_prop: proportion of locations in forest habitat both for CLC and TCD per day/night and aniyr
clc0tcd1_prop: proportion of locations in open habitat for CLC and in forest habitat for TCD per day/night and aniyr
clc1tcd0_prop: proportion of locations in forest habitat for CLC and in open habitat for TCD per day/night and aniyr
tcd_open_prop: proportion of locations in open habitat according to TCD per day/night and aniyr
tcd_forest_prop: proportion of locations in forest habitat according to TCD per day/night and aniyr
clc_open_prop: proportion of locations in open habitat according to CLC per day/night and aniyr
clc_forest_prop: proportion of locations in forest habitat according to CLC per day/night and aniyr

C1.2 kde_data

Data set with absolute number and proportion of cells within the KDE classified as forest/open habitat by TCD and CLC for every individual’s day and night home range

species: roe or red deer
population: unique identifier study area as defined in the eurodeer database
study_areas_id: unique identifier study area as defined in the eurodeer database
aniyr: unique identifier for animal-year
animals_id: unique identifier of animal as defined in the eurodeer database
area: surface area
time: day or night time
total: total number of cells within the day/night home range of the aniyr
clc0tcd0: number of cells in open habitat both for CLC and TCD per day/night and aniyr
clc1tcd1: number of cells in forest habitat both for CLC and TCD per day/night and aniyr
clc0tcd1: number of cells in open habitat for CLC and in forest habitat for TCD per day/night and aniyr
clc1tcd0: number of cells in forest habitat for CLC and in open habitat for TCD per day/night and aniyr
open_tcd: absolute number of cells of locations in open habitat according to TCD per day/night and aniyr
forest_tcd: absolute number of cells of locations in forest habitat according to TCD per day/night and aniyr
open_clc: absolute number of cells of locations in open habitat according to CLC per day/night and aniyr
forest_clc: absolute number of cells of locations in forest habitat according to CLC per day/night and aniyr
clc0tcd0_prop: proportion of cells in open habitat both for CLC and TCD per day/night and aniyr
clc1tcd1_prop: proportion of cells in forest habitat both for CLC and TCD per day/night and aniyr
clc0tcd1_prop: proportion of cells in open habitat for CLC and in forest habitat for TCD per day/night and aniyr
clc1tcd0_prop: proportion of cells in forest habitat for CLC and in open habitat for TCD per day/night and aniyr
open_tcd_prop: proportion of cells in open habitat according to TCD per day/night and aniyr
forest_tcd_prop: proportion of cells in forest habitat according to TCD per day/night and aniyr
open_clc_prop: proportion of cells in open habitat according to CLC per day/night and aniyr
forest_clc_prop: proportion of cells in forest habitat according to CLC per day/night and aniyr

C2. DATA PREPARATION

# add aniyr species column 
point_data$aniyr_species <- paste0(point_data$aniyr,'_',point_data$species)
kernel_data$aniyr_species <- paste0(kernel_data$aniyr,'_',kernel_data$species)

# rename columns so that kernel and point column names are identical
colnames(kernel_data)[10:ncol(kernel_data)] <- colnames(point_data)[10:ncol(point_data)]

aniyr_species <- kernel_data$aniyr_species
kernel_data <- cbind(aniyr_species,kernel_data[,2:(ncol(kernel_data)-1)])

aniyr_species <- point_data$aniyr_species
point_data <- cbind(aniyr_species,point_data[,2:(ncol(point_data)-1)])

# add column for year 
point_data$yearx <- gsub('[0-9]*._','', point_data$aniyr)
kernel_data$yearx <- gsub('[0-9]*._','', kernel_data$aniyr)

# Check which animals have only one observation (day or night)
remove <-unlist(as.vector(subset(plyr::count(kernel_data$aniyr),freq==1)[,'x']))
kernel_data <- subset(kernel_data, aniyr %!in% remove)
point_data <- point_data[which(point_data$aniyr_species %in% unique(kernel_data$aniyr_species)),] # remove animals that were not included in the kernel calculations

# split in order to join both day and night 

# ROE DEER 
# KERNEL
roe_kernel <- subset(kernel_data, species == 'roe') # subset roe
roed_kernel <-roe_kernel[which(roe_kernel$time == 'day'),] # subset roe during daytime
roen_kernel <- roe_kernel[which(roe_kernel$time == 'night'),] # subset roe during nightime
colnames(roed_kernel)[7:length(colnames(roed_kernel))] <- paste0(colnames(roed_kernel)[7:length(colnames(roed_kernel))],'_d') # add _d to the column names of daylight dataset
colnames(roen_kernel)[7:length(colnames(roen_kernel))] <- paste0(colnames(roen_kernel)[7:length(colnames(roen_kernel))],'_n') # add _n to the column names of daylight dataset
roej_kernel <- plyr::join(roed_kernel,roen_kernel, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id')) # if a record for day or night is missing for a deer it is excluded - hence INNER join

# POINTS
roe_points <- subset(point_data, species == 'roe') # subset roe
roed_points <-roe_points[which(roe_points$time == 'day'),] # subset roe during daytime
roen_points <- roe_points[which(roe_points$time == 'night'),] # subset roe during nightime
colnames(roed_points)[8:length(colnames(roed_points))] <- paste0(colnames(roed_points)[8:length(colnames(roed_points))],'_d') # add _d to the column names of daylight dataset
colnames(roen_points)[8:length(colnames(roen_points))] <- paste0(colnames(roen_points)[8:length(colnames(roen_points))],'_n') # add _n to the column names of daylight dataset
roej_points <- plyr::join(roed_points,roen_points, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id','sex')) # if a record for day or night is missing for a deer it is excluded - hence INNER join

# RED DEER 
# KERNEL
red_kernel <- subset(kernel_data, species == 'red') # subset red
redd_kernel <-red_kernel[which(red_kernel$time == 'day'),] # subset roe during daytime
redn_kernel <- red_kernel[which(red_kernel$time == 'night'),] # subset roe during nightime
colnames(redd_kernel)[7:length(colnames(redd_kernel))] <- paste0(colnames(redd_kernel)[7:length(colnames(redd_kernel))],'_d') # add _d to the column names of daylight dataset
colnames(redn_kernel)[7:length(colnames(redn_kernel))] <- paste0(colnames(redn_kernel)[7:length(colnames(redn_kernel))],'_n') # add _n to the column names of daylight dataset
redj_kernel <- plyr::join(redd_kernel,redn_kernel, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id')) # if a record for day or night is missing for a deer it is excluded - hence INNER join

# POINTS
red_points <- subset(point_data, species == 'red') # subset red
redd_points <-red_points[which(red_points$time == 'day'),] # subset roe during daytime
redn_points <- red_points[which(red_points$time == 'night'),] # subset roe during nightime
colnames(redd_points)[8:length(colnames(redd_points))] <- paste0(colnames(redd_points)[8:length(colnames(redd_points))],'_d') # add _d to the column names of daylight dataset
colnames(redn_points)[8:length(colnames(redn_points))] <- paste0(colnames(redn_points)[8:length(colnames(redn_points))],'_n') # add _n to the column names of daylight dataset
redj_points <- plyr::join(redd_points,redn_points, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id','sex')) # if a record for day or night is missing for a deer it is excluded - hence INNER join

# fixes per species 
roe_fixes <- sum(roej_points$total_n + roej_points$total_d)
red_fixes <- sum(redj_points$total_n + redj_points$total_d)
# fixes in total 
sum(roe_fixes,red_fixes)

## [1] 160261

# number of animals per population

# ROE DEER 
(nroe_animals_pop <- plyr::count(unique(roe_points[,c('animals_id','population')])$population))

##    x freq
## 1  1   17
## 2  2   42
## 3  8  149
## 4 15   24
## 5 25   40

(nroe_aniyrs_pop <- plyr::count(unique(roe_points[,c('aniyr','population')])[,'population']))

##    x freq
## 1  1   19
## 2  2   49
## 3  8  165
## 4 15   30
## 5 25   66

(freqroe_ani_aniyr <- plyr::join(nroe_animals_pop, nroe_aniyrs_pop, type='inner', by='x'))

##    x freq freq
## 1  1   17   19
## 2  2   42   49
## 3  8  149  165
## 4 15   24   30
## 5 25   40   66

colnames(freqroe_ani_aniyr) <- c('population','n_animals','n_aniyrs')

# RED DEER
(nred_animals_pop <- plyr::count(unique(red_points[,c('animals_id','population')])$population))

##    x freq
## 1  3    8
## 2  8   60
## 3 14   17
## 4 17   18
## 5 21   14

(nred_aniyrs_pop <- plyr::count(unique(red_points[,c('aniyr','population')])[,'population']))

##    x freq
## 1  3   10
## 2  8   69
## 3 14   29
## 4 17   26
## 5 21   23

(freqred_ani_aniyr <- plyr::join(nred_animals_pop, nred_aniyrs_pop, type='inner', by='x'))

##    x freq freq
## 1  3    8   10
## 2  8   60   69
## 3 14   17   29
## 4 17   18   26
## 5 21   14   23

colnames(freqred_ani_aniyr) <- c('population','n_animals','n_aniyrs')

# animals per population per year 
(roe_per_year <- (plyr::count(roe_points[,c('population','yearx')])))

##    population yearx freq
## 1           1  2005   16
## 2           1  2006   14
## 3           1  2007    8
## 4           2  2006   14
## 5           2  2007    8
## 6           2  2008    2
## 7           2  2010   42
## 8           2  2011   28
## 9           2  2012    2
## 10          2  2013    2
## 11          8  2003   14
## 12          8  2004    4
## 13          8  2005   26
## 14          8  2006   34
## 15          8  2007   34
## 16          8  2008   36
## 17          8  2009   32
## 18          8  2010   32
## 19          8  2011   52
## 20          8  2012   28
## 21          8  2013   38
## 22         15  2010    8
## 23         15  2011   16
## 24         15  2012   24
## 25         15  2013   12
## 26         25  2012   26
## 27         25  2013   72
## 28         25  2014   34

(red_per_year <- (plyr::count(red_points[,c('population','yearx')])))

##    population yearx freq
## 1           3  2005    2
## 2           3  2006    4
## 3           3  2008    2
## 4           3  2010    4
## 5           3  2011    8
## 6           8  2002    4
## 7           8  2003    6
## 8           8  2004    4
## 9           8  2005    4
## 10          8  2006    6
## 11          8  2007    8
## 12          8  2008   28
## 13          8  2009   16
## 14          8  2010    4
## 15          8  2011   26
## 16          8  2012    4
## 17          8  2013   28
## 18         14  2008    2
## 19         14  2009   10
## 20         14  2010   28
## 21         14  2011   12
## 22         14  2012    2
## 23         14  2013    4
## 24         17  2002    6
## 25         17  2004    2
## 26         17  2005    2
## 27         17  2009   12
## 28         17  2010   16
## 29         17  2012    2
## 30         17  2013    6
## 31         17  2014    6
## 32         21  2001    4
## 33         21  2003    2
## 34         21  2004    2
## 35         21  2005    6
## 36         21  2006    2
## 37         21  2009    4
## 38         21  2010   14
## 39         21  2011    6
## 40         21  2013    2
## 41         21  2014    4

# only keep the most prevalent year in some populations to minimize pseudo-replication 

# ROE DEER
france_roe <- unique(subset(roe_points, population == 8 & yearx == 2011)[,'animals_id'])
bavaria_roe <- unique(subset(roe_points, population == 2 & yearx == 2010)[,'animals_id'])
swiss_roe <- unique(subset(roe_points, population == 25 & yearx == 2014)[,'animals_id'])
germany_roe <- unique(subset(roe_points, population == 15)[,'animals_id'])
italy_roe <- unique(subset(roe_points, population == 1)[,'animals_id'])

subset_roe <- subset(roe_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe, germany_roe, italy_roe))
aniyrs_subset_roe <- plyr::count(unique(subset_roe[,c('animals_id','yearx','population')])[,'population'])
animals_subset_roe <- plyr::count(unique(subset_roe[,c('animals_id','population')])[,'population'])
subset_sum_roe <- plyr::join(aniyrs_subset_roe, animals_subset_roe, by ='x',type='left')
subset_sum_roe$diff <- subset_sum_roe[,2]-subset_sum_roe[,3]
subset_sum_roe

##    x freq freq diff
## 1  1   19   17    2
## 2  2   27   21    6
## 3  8   33   26    7
## 4 15   30   24    6
## 5 25   32   17   15

(nr_aniyrs_roe <- sum(subset_sum_roe[,2]))

## [1] 141

(nr_anis_roe <- sum(subset_sum_roe[,3]))

## [1] 105

# RED DEER
italy_red <- unique(subset(red_points, population == 3)[,'animals_id'])
bavaria_red <- unique(subset(red_points, population == 8 & yearx %in% c(2011,2012,2013))[,'animals_id'])
germany_red <- unique(subset(red_points, population == 14)[,'animals_id'])
belgium_a_red <- unique(subset(red_points, population == 17)[,'animals_id'])
belgium_b_red <- unique(subset(red_points, population == 21)[,'animals_id'])

subset_red <- subset(red_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
aniyrs_subset_red <- plyr::count(unique(subset_red[,c('animals_id','yearx','population')])[,'population'])
animals_subset_red <- plyr::count(unique(subset_red[,c('animals_id','population')])[,'population'])
subset_sum_red <- plyr::join(aniyrs_subset_red, animals_subset_red, by ='x',type='left')
subset_sum_red$diff <- subset_sum_red[,2]-subset_sum_red[,3]
subset_sum_red

##    x freq freq diff
## 1  3   10    8    2
## 2  8   29   28    1
## 3 14   29   17   12
## 4 17   26   18    8
## 5 21   23   14    9

(nr_aniyrs_red <- sum(subset_sum_red[,2]))

## [1] 117

(nr_anis_red <- sum(subset_sum_red[,3]))

## [1] 85

# filter animals that are kept for further analysis
roe_points <- subset(roe_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe)) 
red_points <- subset(red_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
roe_kernel <- subset(roe_kernel, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe)) 
red_kernel <- subset(red_kernel, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))

roej_points <- subset(roej_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
redj_points <- subset(redj_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
roej_kernel <- subset(roej_kernel, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
redj_kernel <- subset(redj_kernel, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))

# number of remaining gps points
(roenr_points <- sum(roe_points$tcd_open_abs) + sum(roe_points$tcd_forest_abs))

## [1] 45529

(rednr_points <- sum(red_points$tcd_open_abs) + sum(red_points$tcd_forest_abs))

## [1] 39522

# number of remaining roe and red deer per year per population
(roe_per_year <- (plyr::count(unique(roe_points[,c('animals_id','population','yearx')])[,c('population','yearx')])))

##    population yearx freq
## 1           1  2005    8
## 2           1  2006    7
## 3           1  2007    4
## 4           2  2006    1
## 5           2  2007    1
## 6           2  2010   21
## 7           2  2011    4
## 8           8  2006    1
## 9           8  2008    1
## 10          8  2009    2
## 11          8  2010    1
## 12          8  2011   26
## 13          8  2012    1
## 14          8  2013    1
## 15         15  2010    4
## 16         15  2011    8
## 17         15  2012   12
## 18         15  2013    6
## 19         25  2013   15
## 20         25  2014   17

(red_per_year <- (plyr::count(unique(red_points[,c('animals_id','population','yearx')])[,c('population','yearx')])))

##    population yearx freq
## 1           3  2005    1
## 2           3  2006    2
## 3           3  2008    1
## 4           3  2010    2
## 5           3  2011    4
## 6           8  2011   13
## 7           8  2012    2
## 8           8  2013   14
## 9          14  2008    1
## 10         14  2009    5
## 11         14  2010   14
## 12         14  2011    6
## 13         14  2012    1
## 14         14  2013    2
## 15         17  2002    3
## 16         17  2004    1
## 17         17  2005    1
## 18         17  2009    6
## 19         17  2010    8
## 20         17  2012    1
## 21         17  2013    3
## 22         17  2014    3
## 23         21  2001    2
## 24         21  2003    1
## 25         21  2004    1
## 26         21  2005    3
## 27         21  2006    1
## 28         21  2009    2
## 29         21  2010    7
## 30         21  2011    3
## 31         21  2013    1
## 32         21  2014    2

# bind roe and red deer datasets
POINT_DATA <- rbind(roe_points,red_points)
KERNEL_DATA <- rbind(roe_kernel,red_kernel)

C3. DATA EXPORT

# write.csv(POINT_DATA, '../data/1_mismatch_and_modelling/gps_for_model.csv')
# write.csv(KERNEL_DATA,'../data/1_mismatch_and_modelling/kde_for_model.csv')

D. MISMATCH

Classification mismatch between TCD and CLC in red and roe deer GPS locations

D1. Table 3a: confusion matrix values

# absolute 
(tot_roe <-colSums(roe_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")]))

## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1 
##     4637     3657    24667    12568

(tot_red <-colSums(red_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")]))

## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1 
##     5219     3740     9274    21289

# proportion
(prop_roe <-colSums(roe_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")])/sum(tot_roe))

##   clc0tcd1   clc1tcd0   clc0tcd0   clc1tcd1 
## 0.10184717 0.08032243 0.54178655 0.27604384

(prop_red <-colSums(red_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")])/sum(tot_red))

##   clc0tcd1   clc1tcd0   clc0tcd0   clc1tcd1 
## 0.13205303 0.09463084 0.23465412 0.53866201

## Total mismatch

# absolute 
(tot_roe_mismatch<-sum(tot_roe[1:2]))

## [1] 8294

(prop_roe_mismatch<-sum(prop_roe[1:2]))

## [1] 0.1821696

# proportion 
(tot_red_mismatch<-sum(tot_red[1:2]))

## [1] 8959

(prop_red_mismatch<-sum(prop_red[1:2]))

## [1] 0.2266839

## Proportion of forest and open according to clc and tcd 
(forest_roe_clc <- sum(prop_roe[c('clc1tcd0','clc1tcd1')]))*100

## [1] 35.63663

(forest_roe_tcd <- sum(prop_roe[c('clc0tcd1','clc1tcd1')]))*100

## [1] 37.7891

(forest_red_clc <- sum(prop_red[c('clc1tcd0','clc1tcd1')]))*100

## [1] 63.32928

(forest_red_tcd <- sum(prop_red[c('clc0tcd1','clc1tcd1')]))*100

## [1] 67.0715

D2. Boxplots (not in ms)

res2=300
dims=480/72*res2

# are there identical animal years for roe and red?
length(unique(c(roej_points$aniyr,redj_points$aniyr))) == length(c(roej_points$aniyr,redj_points$aniyr))

## [1] TRUE

length(unique(c(roej_points$aniyr,redj_points$aniyr)))

## [1] 258

length(c(roej_points$aniyr,redj_points$aniyr))

## [1] 258

aniyr <- c(roej_points$aniyr,redj_points$aniyr) # note: there are no common id.year in roe and red deer

#### D2.1 Figure: proportion of forest #### 

## Proportion of forest for roe and red deer during day and night by TCD and CLC ##
proportion_plot()

# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='pdf')
# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='png')
# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='tif')

D2.2 Figure: mismatch between TCD and CLC

boxplot_mismatch()

# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='pdf')
# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='png')
# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='tif')

E. STATISTICAL ANALYSIS

model with forest proportion as response variable

E1. DATA IMPORT

#setwd('/Users/jedgroev/OneDrive - UvA/1_FEM_CRI/PAPERS/MARCO/')
point_data <- read.csv(paste0('../data/1_mismatch_and_modelling/gps_for_model.csv'), stringsAsFactors = FALSE)
kernel_data <- read.csv(paste0('../data/1_mismatch_and_modelling/kde_for_model.csv'), stringsAsFactors = FALSE)

E2. DATA PREPARATION FOR MODEL

# convert to factors 
point_data$animals_id_fact <- as.factor(point_data$animals_id)
point_data$population <- factor(paste0(point_data$species,'_',point_data$study_areas_id), levels = unique(sort(paste0(point_data$species,'_',point_data$study_areas_id),decreasing = TRUE)))
point_data$time <- factor(point_data$time)

kernel_data$animals_id_fact <- as.factor(kernel_data$animals_id)
kernel_data$population <-  factor(paste0(kernel_data$species,'_',kernel_data$study_areas_id), levels = unique(sort(paste0(kernel_data$species,'_',kernel_data$study_areas_id),decreasing = TRUE)))
kernel_data$time <- factor(kernel_data$time)

# data frames to define order of the population in the plot 

# RED DEER
(red_pops_or <- data.frame(ord= 6:10, 
                           population=c('red_3','red_14','red_8','red_21','red_17'), 
                           popname= c('N-Italy', 'N-Germany','SE-Germany', 'SE-Belgium', 'SW-Belgium'),
                           stringsAsFactors = FALSE))

##   ord population    popname
## 1   6      red_3    N-Italy
## 2   7     red_14  N-Germany
## 3   8      red_8 SE-Germany
## 4   9     red_21 SE-Belgium
## 5  10     red_17 SW-Belgium

# ROE DEER
(roe_pops_or <-data.frame(ord=1:5, 
                          population=c('roe_8','roe_15','roe_25','roe_1','roe_2'),
                          popname=c('SW-France', 'SW-Germany', 'Switzerland', 'N-Italy', 'SE-Germany'),
                          stringsAsFactors = FALSE))

##   ord population     popname
## 1   1      roe_8   SW-France
## 2   2     roe_15  SW-Germany
## 3   3     roe_25 Switzerland
## 4   4      roe_1     N-Italy
## 5   5      roe_2  SE-Germany

roered_pops_df <- rbind(roe_pops_or,red_pops_or)

point_data <- plyr::join(point_data, roered_pops_df, type='left', by='population')
kernel_data <- plyr::join(kernel_data, roered_pops_df, type='left', by='population')

E3. Exploratory plots (not in ms)

#### Figure: Proportion of forest according to tcd and clc in day and night ####
par(mfrow=c(2,1), mar=c(10,4,2,2))
prop <- aggregate(. ~ popname + time ,data=point_data[, c('popname','total','time','tcd_forest_abs','clc_forest_abs','tcd_open_abs','clc_open_abs')],sum)
prop$tcd_forest_abs <- prop$tcd_forest_abs/prop$total
prop$clc_forest_abs <- prop$clc_forest_abs/prop$total
prop$tcd_open_abs <- prop$tcd_open_abs/prop$total
prop$clc_open_abs <- prop$clc_open_abs/prop$total
prop <- plyr::join(prop,roered_pops_df[,c('ord','popname')], type='left', by='popname')
prop <- prop[order(prop$ord, prop$time),]
a <- t(as.matrix(prop[,c('tcd_forest_abs','tcd_open_abs')]))
b <- t(as.matrix(prop[,c('clc_forest_abs','clc_open_abs')]))
colnames(a) <- paste0(prop$popname, '_',prop$time)
colnames(b) <- paste0(prop$popname, '_',prop$time)
barplot(a, las=2,main='tcd', space=c(0,rep(c(0,0.5),10))[1:20])
barplot(b, las=2,main='clc', space=c(0,rep(c(0,0.5),10))[1:20])

#### Figure: confusion matrix as barplots (per population and day vs night) ####
par(mfrow=c(1,1), mar=c(10,4,2,0))
prop <- aggregate(. ~ popname + time ,
                  data=point_data[, c('popname','total','time','clc0tcd0','clc1tcd0','clc1tcd1','clc0tcd1')],sum)
prop$clc0tcd0 <- prop$clc0tcd0/prop$total
prop$clc1tcd0 <- prop$clc1tcd0/prop$total
prop$clc1tcd1 <- prop$clc1tcd1/prop$total
prop$clc0tcd1 <- prop$clc0tcd1/prop$total
prop <- plyr::join(prop,roered_pops_df[,c('ord','popname')], type='left', by='popname')
prop <- prop[order(prop$ord, prop$time),]
a <- t(as.matrix(prop[,c('clc1tcd1','clc0tcd1','clc0tcd0','clc1tcd0')]))
colnames(a) <- paste0(prop$popname, '_',prop$time)
barplot(a, 
        las=2,
        main='tcd', 
        space=c(0,rep(c(0,0.5),10))[1:20], 
        col=c('forestgreen','blue','darkolivegreen1','lightblue'))

#### Figure: Bar and boxplots of forest proportion ####
par(mfrow=c(1,1), mar=c(6,0,2,0))
forest_proportion_barboxplot()

# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='pdf')
# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='png')
# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='tif')

E4. MODELS

# make a subset for each species and method (KDE/TCD)
sub_roe_point <- subset(point_data,species=='roe')
sub_red_point <- subset(point_data,species=='red')
sub_roe_kernel <- subset(kernel_data,species=='roe')
sub_red_kernel <- subset(kernel_data,species=='red')

# remove unnecessary factor levels  
sub_roe_point[] <- lapply(sub_roe_point, function(x) if(is.factor(x)) factor(x) else x)
sub_red_point[] <- lapply(sub_red_point, function(x) if(is.factor(x)) factor(x) else x)
sub_roe_kernel[] <- lapply(sub_roe_kernel, function(x) if(is.factor(x)) factor(x) else x)
sub_red_kernel[] <- lapply(sub_red_kernel, function(x) if(is.factor(x)) factor(x) else x)


#### E4.1 models and model selection ####

mods <- model_selection()

models <- mods[[1]]
best <- mods[[2]]

#### E4.2 rsquares ####

(rsq_model <- lapply(models, function(x) rsquared(x)))

## [[1]]
##          Response   family     link method R.squared
## 1 tcd_forest_prop gaussian identity   none 0.5615685
## 
## [[2]]
##          Response   family     link method R.squared
## 1 clc_forest_prop gaussian identity   none 0.4454718
## 
## [[3]]
##          Response   family     link method R.squared
## 1 tcd_forest_prop gaussian identity   none 0.5674175
## 
## [[4]]
##          Response   family     link method R.squared
## 1 clc_forest_prop gaussian identity   none 0.5050512
## 
## [[5]]
##          Response   family     link method R.squared
## 1 tcd_forest_prop gaussian identity   none 0.4947636
## 
## [[6]]
##          Response   family     link method R.squared
## 1 clc_forest_prop gaussian identity   none 0.1723261
## 
## [[7]]
##          Response   family     link method R.squared
## 1 tcd_forest_prop gaussian identity   none 0.3505672
## 
## [[8]]
##          Response   family     link method R.squared
## 1 clc_forest_prop gaussian identity   none 0.2259531

names(rsq_model) <-  c('tcd_gps-roe','clc_gps-roe','tcd_kde-roe','clc_kde-roe',
                       'tcd_gps-red','clc_gps-red','tcd_kde-red','clc_kde-red')

rsq_rand_vs_model <- list(data.frame())
for(i in 1:length(models)){
  rsq_rand_vs_model[[i]] <- cbind(rsq_model[[i]])
}
names(rsq_rand_vs_model) <-  c('tcd_gps-roe','clc_gps-roe','tcd_kde-roe','clc_kde-roe',
                               'tcd_gps-red','clc_gps-red','tcd_kde-red','clc_kde-red')

rsq_rand_vs_model <- do.call(rbind.data.frame, rsq_rand_vs_model)
colnames(rsq_rand_vs_model) <- paste0(colnames(rsq_rand_vs_model), c('-model','-model'))
rsq_rand_vs_model$`R.squared-model` <- round(rsq_rand_vs_model$`R.squared-model`,4)

#write.csv(rsq_rand_vs_model, '../results/rsq_models.csv')


#### E4.3 model results table ####

datasets <- list(sub_roe_point,sub_roe_point,sub_roe_kernel,sub_roe_kernel,
                 sub_red_point,sub_red_point,sub_red_kernel,sub_red_kernel)
model_names <- c('GPS-TCD','GPS-CLC','KDE-TCD','KDE-CLC','GPS-TCD','GPS-CLC','KDE-TCD','KDE-CLC')
models_summary <- lapply(models, function(x) summary(x))
models_coef <- lapply(models_summary, function(x) as.data.frame(x$tTable[,c(1,2,4)]))
for(i in 1:length(models_coef))
{
  colnames(models_coef[[i]]) <- c('Estimate','se','sig')
  models_coef[[i]] <- round(models_coef[[i]],2) 
  #  models_coef[[i]]$sig <- as.data.frame(models_coef[[i]])$sig > 0.1
  models_coef[[i]][which(models_coef[[i]]$sig <= 0.1),'sig'] <- NA
  models_coef[[i]][which(models_coef[[i]]$sig > 0.1),'sig'] <- '(NS)'
  models_coef[[i]]$model <- c(model_names[i])
  models_coef[[i]]$estimate <- paste0(models_coef[[i]]$Estimate,'+/-',models_coef[[i]]$se,models_coef[[i]]$sig)
  models_coef[[i]]$species <- c(c('roe','roe','roe','roe','red','red','red','red')[i])
  models_coef[[i]] <- models_coef[[i]][,c('species','estimate')]
  colnames(models_coef[[i]]) <- c(model_names[i],model_names[i]) 
}

# GPS KDE roe 
roe_models <- cbind(models_coef[[1]],models_coef[[2]],models_coef[[3]],models_coef[[4]])[,c(1,seq(2,8,2))]
# GPS KDE red 
#models_coef[[8]]<- rbind(models_coef[[8]][1,],c(NA,NA,NA),models_coef[[8]][2:5,])
red_models <- cbind(models_coef[[5]],models_coef[[6]],models_coef[[7]],models_coef[[8]])[,c(1,seq(2,8,2))]
(roe_red_models <- rbind(roe_models, red_models))

##                  GPS-TCD        GPS-TCD.1          GPS-CLC        KDE-TCD
## (Intercept)          roe    0.39+/-0.03NA    0.24+/-0.03NA  0.28+/-0.03NA
## timenight            roe   -0.27+/-0.03NA   -0.19+/-0.03NA -0.09+/-0.02NA
## populationroe_25     roe    0.06+/-0.03NA     0.2+/-0.04NA  0.07+/-0.03NA
## populationroe_2      roe    0.44+/-0.03NA    0.61+/-0.04NA   0.5+/-0.04NA
## populationroe_15     roe     0+/-0.03(NS)    0.12+/-0.05NA   0+/-0.03(NS)
## populationroe_1      roe     0.5+/-0.04NA    0.46+/-0.05NA  0.44+/-0.04NA
## (Intercept)1         red    0.73+/-0.03NA    0.62+/-0.05NA  0.73+/-0.02NA
## timenight1           red   -0.31+/-0.02NA   -0.15+/-0.04NA -0.05+/-0.02NA
## populationred_3      red -0.04+/-0.04(NS)  0.01+/-0.08(NS)  -0.1+/-0.04NA
## populationred_21     red    0.23+/-0.03NA    0.13+/-0.06NA  0.19+/-0.03NA
## populationred_17     red    0.17+/-0.03NA    0.26+/-0.05NA  0.09+/-0.03NA
## populationred_14     red  0.05+/-0.04(NS) -0.02+/-0.06(NS) -0.16+/-0.04NA
##                           KDE-CLC
## (Intercept)         0.16+/-0.03NA
## timenight          -0.09+/-0.03NA
## populationroe_25    0.22+/-0.03NA
## populationroe_2     0.64+/-0.04NA
## populationroe_15    0.15+/-0.04NA
## populationroe_1     0.43+/-0.05NA
## (Intercept)1        0.66+/-0.04NA
## timenight1       -0.04+/-0.03(NS)
## populationred_3  -0.08+/-0.05(NS)
## populationred_21    0.14+/-0.05NA
## populationred_17    0.18+/-0.04NA
## populationred_14   -0.14+/-0.05NA

#write.csv(roe_red_models,'../results/models_roe_red_round.csv')


#### E4.4 Predictions ####

# Preparation 

# data frames to define order of the population in the plot 
# red
(red_pops_or <- data.frame(ord= c(1,2,3,4,5), 
                           population=c('red_3','red_14','red_8','red_21','red_17'), 
                           popname= c('N-Italy','N-Germany','SE-Germany','SE-Belgium','SW-Belgium'),
                           stringsAsFactors = FALSE))

##   ord population    popname
## 1   1      red_3    N-Italy
## 2   2     red_14  N-Germany
## 3   3      red_8 SE-Germany
## 4   4     red_21 SE-Belgium
## 5   5     red_17 SW-Belgium

# roe
(roe_pops_or <-data.frame(ord=1:5, 
                          population=c('roe_8','roe_15','roe_25','roe_1','roe_2'),
                          popname=c('SW-France','SW-Germany','Switzerland','N-Italy','SE-Germany'),
                          stringsAsFactors = FALSE))

##   ord population     popname
## 1   1      roe_8   SW-France
## 2   2     roe_15  SW-Germany
## 3   3     roe_25 Switzerland
## 4   4      roe_1     N-Italy
## 5   5      roe_2  SE-Germany

# repeat the dataframe so to use them accordingly in the loop 
roered_popsor <- list(roe_pops_or,roe_pops_or,roe_pops_or,roe_pops_or,red_pops_or,red_pops_or,red_pops_or,red_pops_or)

# predictions 
datasets <- list(sub_roe_point,sub_roe_point,sub_roe_kernel,sub_roe_kernel,sub_red_point,sub_red_point,sub_red_kernel,sub_red_kernel)
res <- list(data.frame())
for(i in 1:length(models)){
  # create new datset with all combinations of populations and day/night
  newdata = unique(datasets[[i]][,c('population','time')])
  # predict using the type response and include se.fit
  pr <- predict(models[[i]], newdata, type="response",se.fit=TRUE)
  # add the fit and se to the newdata dataframe 
  newdata$pred <- pr$fit
  newdata$se.pred <- pr$se.fit
  # get the family of the model 
  family <- family(models[[i]])
  # perform linkinv to get the upper and lower confidence levels 
  newdata$lower <- family$linkinv(pr$fit - qnorm(0.95) * pr$se.fit)   
  newdata$upper <- family$linkinv(pr$fit + qnorm(0.95) * pr$se.fit)
  # calculate the probabilities from the predictions 
  newdata$probs <- exp(newdata$pred)/(1+exp(newdata$pred)) #gives you probability that y=1 for each observation
  # join the population table created above (roered_popsor)
  newdata<- plyr::join(newdata, roered_popsor[[i]],type='left',by='population')
  # sort based on the column ord (order) and time 
  res[[i]]<- newdata[order(newdata$ord,newdata$time),]
}

modnames <-  c('roe_gps_tcd','roe_gps_clc','roe_kde_tcd','roe_kde_clc','red_gps_tcd','red_gps_clc','red_kde_tcd','red_kde_clc')
names(res) <- modnames

# increase in proportional use between night and day 
dvn <- list()
nvd <- list()
for(i in 1:length(res)){
  dvn[[i]] <- (subset(res[[i]], time == 'day')$pred ) / (subset(res[[i]],time=='night')$pred)
  nvd[[i]] <- (subset(res[[i]], time == 'night')$pred ) / (subset(res[[i]],time=='day')$pred)
}
names(dvn) <- modnames 
names(nvd) <- modnames 

dvn_mean <- lapply(dvn, function(x) mean(x))
dvn_sd <- lapply(dvn, function(x) sd(x))
dvn_min <- lapply(dvn, function(x) min(x))
dvn_max <- lapply(dvn, function(x) max(x))

nvd_mean <- lapply(nvd, function(x) mean(x))
nvd_sd <- lapply(nvd, function(x) sd(x))
nvd_min <- lapply(nvd, function(x) min(x))
nvd_max <- lapply(nvd, function(x) max(x))

dvnnvd <- as.data.frame(unlist(dvn_mean))
colnames(dvnnvd) <- 'mean_dvn'
dvnnvd$sd_dvn <- as.vector(unlist(dvn_sd))
dvnnvd$min_dvn <- as.vector(unlist(dvn_min))
dvnnvd$max_dvn <- as.vector(unlist(dvn_max))

dvnnvd$mean_nvd <- 1 - as.vector(unlist(nvd_mean))
dvnnvd$sd_nvd <- as.vector(unlist(nvd_sd))
dvnnvd$min_nvd <- 1 - as.vector(unlist(nvd_min))
dvnnvd$max_nvd <- 1 - as.vector(unlist(nvd_max))

#write.csv(dvnnvd, '../results/difference_between_dayandnight.csv')

#### E4.5 Figure 4: Predictions Plot ####

res <- res[c(5:8,1:4)]
best <- best[c(5:8,1:4),]
plot_predictions()

# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='pdf')
# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='png')
# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='tif')