library(rpostgis)
library(plyr)
library(RPostgreSQL)
library(adehabitatHR)
library(raster)
library(sf)
library(sp)
library(RgoogleMaps)
library(png)
library(ggplot2)
library(gridExtra)
library(grid)
library(mgcv)
library(glmmTMB)
library(RCurl)
library(lme4)
library(nlme)
library(MuMIn)
library(piecewiseSEM)
library(Hmisc)
'%!in%' <- function(x,y)!('%in%'(x,y)) # create 'not in' operator
lapply(list.files(pattern= '^1.*', path='./fncs/',full.names=TRUE), source)
source(list.files(pattern= '^00.*', path='./fncs/',full.names=TRUE))
point_data <- read.csv(paste0('../data/1_mismatch_and_modelling/gps_data.csv'), stringsAsFactors=FALSE)
kernel_data <- read.csv(paste0('../data/1_mismatch_and_modelling/kde_data.csv'), stringsAsFactors=FALSE)
Data set with absolute number and proportion of locations classified as forest/open habitat by TCD and CLC for every individual’s day and night locations
Data set with absolute number and proportion of cells within the KDE classified as forest/open habitat by TCD and CLC for every individual’s day and night home range
# add aniyr species column
point_data$aniyr_species <- paste0(point_data$aniyr,'_',point_data$species)
kernel_data$aniyr_species <- paste0(kernel_data$aniyr,'_',kernel_data$species)
# rename columns so that kernel and point column names are identical
colnames(kernel_data)[10:ncol(kernel_data)] <- colnames(point_data)[10:ncol(point_data)]
aniyr_species <- kernel_data$aniyr_species
kernel_data <- cbind(aniyr_species,kernel_data[,2:(ncol(kernel_data)-1)])
aniyr_species <- point_data$aniyr_species
point_data <- cbind(aniyr_species,point_data[,2:(ncol(point_data)-1)])
# add column for year
point_data$yearx <- gsub('[0-9]*._','', point_data$aniyr)
kernel_data$yearx <- gsub('[0-9]*._','', kernel_data$aniyr)
# Check which animals have only one observation (day or night)
remove <-unlist(as.vector(subset(plyr::count(kernel_data$aniyr),freq==1)[,'x']))
kernel_data <- subset(kernel_data, aniyr %!in% remove)
point_data <- point_data[which(point_data$aniyr_species %in% unique(kernel_data$aniyr_species)),] # remove animals that were not included in the kernel calculations
# split in order to join both day and night
# ROE DEER
# KERNEL
roe_kernel <- subset(kernel_data, species == 'roe') # subset roe
roed_kernel <-roe_kernel[which(roe_kernel$time == 'day'),] # subset roe during daytime
roen_kernel <- roe_kernel[which(roe_kernel$time == 'night'),] # subset roe during nightime
colnames(roed_kernel)[7:length(colnames(roed_kernel))] <- paste0(colnames(roed_kernel)[7:length(colnames(roed_kernel))],'_d') # add _d to the column names of daylight dataset
colnames(roen_kernel)[7:length(colnames(roen_kernel))] <- paste0(colnames(roen_kernel)[7:length(colnames(roen_kernel))],'_n') # add _n to the column names of daylight dataset
roej_kernel <- plyr::join(roed_kernel,roen_kernel, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id')) # if a record for day or night is missing for a deer it is excluded - hence INNER join
# POINTS
roe_points <- subset(point_data, species == 'roe') # subset roe
roed_points <-roe_points[which(roe_points$time == 'day'),] # subset roe during daytime
roen_points <- roe_points[which(roe_points$time == 'night'),] # subset roe during nightime
colnames(roed_points)[8:length(colnames(roed_points))] <- paste0(colnames(roed_points)[8:length(colnames(roed_points))],'_d') # add _d to the column names of daylight dataset
colnames(roen_points)[8:length(colnames(roen_points))] <- paste0(colnames(roen_points)[8:length(colnames(roen_points))],'_n') # add _n to the column names of daylight dataset
roej_points <- plyr::join(roed_points,roen_points, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id','sex')) # if a record for day or night is missing for a deer it is excluded - hence INNER join
# RED DEER
# KERNEL
red_kernel <- subset(kernel_data, species == 'red') # subset red
redd_kernel <-red_kernel[which(red_kernel$time == 'day'),] # subset roe during daytime
redn_kernel <- red_kernel[which(red_kernel$time == 'night'),] # subset roe during nightime
colnames(redd_kernel)[7:length(colnames(redd_kernel))] <- paste0(colnames(redd_kernel)[7:length(colnames(redd_kernel))],'_d') # add _d to the column names of daylight dataset
colnames(redn_kernel)[7:length(colnames(redn_kernel))] <- paste0(colnames(redn_kernel)[7:length(colnames(redn_kernel))],'_n') # add _n to the column names of daylight dataset
redj_kernel <- plyr::join(redd_kernel,redn_kernel, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id')) # if a record for day or night is missing for a deer it is excluded - hence INNER join
# POINTS
red_points <- subset(point_data, species == 'red') # subset red
redd_points <-red_points[which(red_points$time == 'day'),] # subset roe during daytime
redn_points <- red_points[which(red_points$time == 'night'),] # subset roe during nightime
colnames(redd_points)[8:length(colnames(redd_points))] <- paste0(colnames(redd_points)[8:length(colnames(redd_points))],'_d') # add _d to the column names of daylight dataset
colnames(redn_points)[8:length(colnames(redn_points))] <- paste0(colnames(redn_points)[8:length(colnames(redn_points))],'_n') # add _n to the column names of daylight dataset
redj_points <- plyr::join(redd_points,redn_points, 'inner', by=c('aniyr_species','species','population','study_areas_id','aniyr','animals_id','sex')) # if a record for day or night is missing for a deer it is excluded - hence INNER join
# fixes per species
roe_fixes <- sum(roej_points$total_n + roej_points$total_d)
red_fixes <- sum(redj_points$total_n + redj_points$total_d)
# fixes in total
sum(roe_fixes,red_fixes)
## [1] 160261
# number of animals per population
# ROE DEER
(nroe_animals_pop <- plyr::count(unique(roe_points[,c('animals_id','population')])$population))
## x freq
## 1 1 17
## 2 2 42
## 3 8 149
## 4 15 24
## 5 25 40
(nroe_aniyrs_pop <- plyr::count(unique(roe_points[,c('aniyr','population')])[,'population']))
## x freq
## 1 1 19
## 2 2 49
## 3 8 165
## 4 15 30
## 5 25 66
(freqroe_ani_aniyr <- plyr::join(nroe_animals_pop, nroe_aniyrs_pop, type='inner', by='x'))
## x freq freq
## 1 1 17 19
## 2 2 42 49
## 3 8 149 165
## 4 15 24 30
## 5 25 40 66
colnames(freqroe_ani_aniyr) <- c('population','n_animals','n_aniyrs')
# RED DEER
(nred_animals_pop <- plyr::count(unique(red_points[,c('animals_id','population')])$population))
## x freq
## 1 3 8
## 2 8 60
## 3 14 17
## 4 17 18
## 5 21 14
(nred_aniyrs_pop <- plyr::count(unique(red_points[,c('aniyr','population')])[,'population']))
## x freq
## 1 3 10
## 2 8 69
## 3 14 29
## 4 17 26
## 5 21 23
(freqred_ani_aniyr <- plyr::join(nred_animals_pop, nred_aniyrs_pop, type='inner', by='x'))
## x freq freq
## 1 3 8 10
## 2 8 60 69
## 3 14 17 29
## 4 17 18 26
## 5 21 14 23
colnames(freqred_ani_aniyr) <- c('population','n_animals','n_aniyrs')
# animals per population per year
(roe_per_year <- (plyr::count(roe_points[,c('population','yearx')])))
## population yearx freq
## 1 1 2005 16
## 2 1 2006 14
## 3 1 2007 8
## 4 2 2006 14
## 5 2 2007 8
## 6 2 2008 2
## 7 2 2010 42
## 8 2 2011 28
## 9 2 2012 2
## 10 2 2013 2
## 11 8 2003 14
## 12 8 2004 4
## 13 8 2005 26
## 14 8 2006 34
## 15 8 2007 34
## 16 8 2008 36
## 17 8 2009 32
## 18 8 2010 32
## 19 8 2011 52
## 20 8 2012 28
## 21 8 2013 38
## 22 15 2010 8
## 23 15 2011 16
## 24 15 2012 24
## 25 15 2013 12
## 26 25 2012 26
## 27 25 2013 72
## 28 25 2014 34
(red_per_year <- (plyr::count(red_points[,c('population','yearx')])))
## population yearx freq
## 1 3 2005 2
## 2 3 2006 4
## 3 3 2008 2
## 4 3 2010 4
## 5 3 2011 8
## 6 8 2002 4
## 7 8 2003 6
## 8 8 2004 4
## 9 8 2005 4
## 10 8 2006 6
## 11 8 2007 8
## 12 8 2008 28
## 13 8 2009 16
## 14 8 2010 4
## 15 8 2011 26
## 16 8 2012 4
## 17 8 2013 28
## 18 14 2008 2
## 19 14 2009 10
## 20 14 2010 28
## 21 14 2011 12
## 22 14 2012 2
## 23 14 2013 4
## 24 17 2002 6
## 25 17 2004 2
## 26 17 2005 2
## 27 17 2009 12
## 28 17 2010 16
## 29 17 2012 2
## 30 17 2013 6
## 31 17 2014 6
## 32 21 2001 4
## 33 21 2003 2
## 34 21 2004 2
## 35 21 2005 6
## 36 21 2006 2
## 37 21 2009 4
## 38 21 2010 14
## 39 21 2011 6
## 40 21 2013 2
## 41 21 2014 4
# only keep the most prevalent year in some populations to minimize pseudo-replication
# ROE DEER
france_roe <- unique(subset(roe_points, population == 8 & yearx == 2011)[,'animals_id'])
bavaria_roe <- unique(subset(roe_points, population == 2 & yearx == 2010)[,'animals_id'])
swiss_roe <- unique(subset(roe_points, population == 25 & yearx == 2014)[,'animals_id'])
germany_roe <- unique(subset(roe_points, population == 15)[,'animals_id'])
italy_roe <- unique(subset(roe_points, population == 1)[,'animals_id'])
subset_roe <- subset(roe_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe, germany_roe, italy_roe))
aniyrs_subset_roe <- plyr::count(unique(subset_roe[,c('animals_id','yearx','population')])[,'population'])
animals_subset_roe <- plyr::count(unique(subset_roe[,c('animals_id','population')])[,'population'])
subset_sum_roe <- plyr::join(aniyrs_subset_roe, animals_subset_roe, by ='x',type='left')
subset_sum_roe$diff <- subset_sum_roe[,2]-subset_sum_roe[,3]
subset_sum_roe
## x freq freq diff
## 1 1 19 17 2
## 2 2 27 21 6
## 3 8 33 26 7
## 4 15 30 24 6
## 5 25 32 17 15
(nr_aniyrs_roe <- sum(subset_sum_roe[,2]))
## [1] 141
(nr_anis_roe <- sum(subset_sum_roe[,3]))
## [1] 105
# RED DEER
italy_red <- unique(subset(red_points, population == 3)[,'animals_id'])
bavaria_red <- unique(subset(red_points, population == 8 & yearx %in% c(2011,2012,2013))[,'animals_id'])
germany_red <- unique(subset(red_points, population == 14)[,'animals_id'])
belgium_a_red <- unique(subset(red_points, population == 17)[,'animals_id'])
belgium_b_red <- unique(subset(red_points, population == 21)[,'animals_id'])
subset_red <- subset(red_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
aniyrs_subset_red <- plyr::count(unique(subset_red[,c('animals_id','yearx','population')])[,'population'])
animals_subset_red <- plyr::count(unique(subset_red[,c('animals_id','population')])[,'population'])
subset_sum_red <- plyr::join(aniyrs_subset_red, animals_subset_red, by ='x',type='left')
subset_sum_red$diff <- subset_sum_red[,2]-subset_sum_red[,3]
subset_sum_red
## x freq freq diff
## 1 3 10 8 2
## 2 8 29 28 1
## 3 14 29 17 12
## 4 17 26 18 8
## 5 21 23 14 9
(nr_aniyrs_red <- sum(subset_sum_red[,2]))
## [1] 117
(nr_anis_red <- sum(subset_sum_red[,3]))
## [1] 85
# filter animals that are kept for further analysis
roe_points <- subset(roe_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
red_points <- subset(red_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
roe_kernel <- subset(roe_kernel, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
red_kernel <- subset(red_kernel, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
roej_points <- subset(roej_points, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
redj_points <- subset(redj_points, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
roej_kernel <- subset(roej_kernel, animals_id %in% c(france_roe,bavaria_roe,swiss_roe,germany_roe,italy_roe))
redj_kernel <- subset(redj_kernel, animals_id %in% c(italy_red,bavaria_red,germany_red,belgium_a_red,belgium_b_red))
# number of remaining gps points
(roenr_points <- sum(roe_points$tcd_open_abs) + sum(roe_points$tcd_forest_abs))
## [1] 45529
(rednr_points <- sum(red_points$tcd_open_abs) + sum(red_points$tcd_forest_abs))
## [1] 39522
# number of remaining roe and red deer per year per population
(roe_per_year <- (plyr::count(unique(roe_points[,c('animals_id','population','yearx')])[,c('population','yearx')])))
## population yearx freq
## 1 1 2005 8
## 2 1 2006 7
## 3 1 2007 4
## 4 2 2006 1
## 5 2 2007 1
## 6 2 2010 21
## 7 2 2011 4
## 8 8 2006 1
## 9 8 2008 1
## 10 8 2009 2
## 11 8 2010 1
## 12 8 2011 26
## 13 8 2012 1
## 14 8 2013 1
## 15 15 2010 4
## 16 15 2011 8
## 17 15 2012 12
## 18 15 2013 6
## 19 25 2013 15
## 20 25 2014 17
(red_per_year <- (plyr::count(unique(red_points[,c('animals_id','population','yearx')])[,c('population','yearx')])))
## population yearx freq
## 1 3 2005 1
## 2 3 2006 2
## 3 3 2008 1
## 4 3 2010 2
## 5 3 2011 4
## 6 8 2011 13
## 7 8 2012 2
## 8 8 2013 14
## 9 14 2008 1
## 10 14 2009 5
## 11 14 2010 14
## 12 14 2011 6
## 13 14 2012 1
## 14 14 2013 2
## 15 17 2002 3
## 16 17 2004 1
## 17 17 2005 1
## 18 17 2009 6
## 19 17 2010 8
## 20 17 2012 1
## 21 17 2013 3
## 22 17 2014 3
## 23 21 2001 2
## 24 21 2003 1
## 25 21 2004 1
## 26 21 2005 3
## 27 21 2006 1
## 28 21 2009 2
## 29 21 2010 7
## 30 21 2011 3
## 31 21 2013 1
## 32 21 2014 2
# bind roe and red deer datasets
POINT_DATA <- rbind(roe_points,red_points)
KERNEL_DATA <- rbind(roe_kernel,red_kernel)
# write.csv(POINT_DATA, '../data/1_mismatch_and_modelling/gps_for_model.csv')
# write.csv(KERNEL_DATA,'../data/1_mismatch_and_modelling/kde_for_model.csv')
Classification mismatch between TCD and CLC in red and roe deer GPS locations
# absolute
(tot_roe <-colSums(roe_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")]))
## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1
## 4637 3657 24667 12568
(tot_red <-colSums(red_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")]))
## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1
## 5219 3740 9274 21289
# proportion
(prop_roe <-colSums(roe_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")])/sum(tot_roe))
## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1
## 0.10184717 0.08032243 0.54178655 0.27604384
(prop_red <-colSums(red_points[,c("clc0tcd1","clc1tcd0","clc0tcd0","clc1tcd1")])/sum(tot_red))
## clc0tcd1 clc1tcd0 clc0tcd0 clc1tcd1
## 0.13205303 0.09463084 0.23465412 0.53866201
## Total mismatch
# absolute
(tot_roe_mismatch<-sum(tot_roe[1:2]))
## [1] 8294
(prop_roe_mismatch<-sum(prop_roe[1:2]))
## [1] 0.1821696
# proportion
(tot_red_mismatch<-sum(tot_red[1:2]))
## [1] 8959
(prop_red_mismatch<-sum(prop_red[1:2]))
## [1] 0.2266839
## Proportion of forest and open according to clc and tcd
(forest_roe_clc <- sum(prop_roe[c('clc1tcd0','clc1tcd1')]))*100
## [1] 35.63663
(forest_roe_tcd <- sum(prop_roe[c('clc0tcd1','clc1tcd1')]))*100
## [1] 37.7891
(forest_red_clc <- sum(prop_red[c('clc1tcd0','clc1tcd1')]))*100
## [1] 63.32928
(forest_red_tcd <- sum(prop_red[c('clc0tcd1','clc1tcd1')]))*100
## [1] 67.0715
res2=300
dims=480/72*res2
# are there identical animal years for roe and red?
length(unique(c(roej_points$aniyr,redj_points$aniyr))) == length(c(roej_points$aniyr,redj_points$aniyr))
## [1] TRUE
length(unique(c(roej_points$aniyr,redj_points$aniyr)))
## [1] 258
length(c(roej_points$aniyr,redj_points$aniyr))
## [1] 258
aniyr <- c(roej_points$aniyr,redj_points$aniyr) # note: there are no common id.year in roe and red deer
#### D2.1 Figure: proportion of forest ####
## Proportion of forest for roe and red deer during day and night by TCD and CLC ##
proportion_plot()
# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='pdf')
# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='png')
# export(func=proportion_plot(),file="../results/boxplots_proportion_of_forest_CLC_TCD_F",res=300, ratio=1.5, type='tif')
boxplot_mismatch()
# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='pdf')
# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='png')
# export(func=boxplot_mismatch(withbox=TRUE),file="../results/boxplots_mismatch_CLC_TCD_withcolbox",res=300, ratio=1.5, type='tif')
model with forest proportion as response variable
#setwd('/Users/jedgroev/OneDrive - UvA/1_FEM_CRI/PAPERS/MARCO/')
point_data <- read.csv(paste0('../data/1_mismatch_and_modelling/gps_for_model.csv'), stringsAsFactors = FALSE)
kernel_data <- read.csv(paste0('../data/1_mismatch_and_modelling/kde_for_model.csv'), stringsAsFactors = FALSE)
# convert to factors
point_data$animals_id_fact <- as.factor(point_data$animals_id)
point_data$population <- factor(paste0(point_data$species,'_',point_data$study_areas_id), levels = unique(sort(paste0(point_data$species,'_',point_data$study_areas_id),decreasing = TRUE)))
point_data$time <- factor(point_data$time)
kernel_data$animals_id_fact <- as.factor(kernel_data$animals_id)
kernel_data$population <- factor(paste0(kernel_data$species,'_',kernel_data$study_areas_id), levels = unique(sort(paste0(kernel_data$species,'_',kernel_data$study_areas_id),decreasing = TRUE)))
kernel_data$time <- factor(kernel_data$time)
# data frames to define order of the population in the plot
# RED DEER
(red_pops_or <- data.frame(ord= 6:10,
population=c('red_3','red_14','red_8','red_21','red_17'),
popname= c('N-Italy', 'N-Germany','SE-Germany', 'SE-Belgium', 'SW-Belgium'),
stringsAsFactors = FALSE))
## ord population popname
## 1 6 red_3 N-Italy
## 2 7 red_14 N-Germany
## 3 8 red_8 SE-Germany
## 4 9 red_21 SE-Belgium
## 5 10 red_17 SW-Belgium
# ROE DEER
(roe_pops_or <-data.frame(ord=1:5,
population=c('roe_8','roe_15','roe_25','roe_1','roe_2'),
popname=c('SW-France', 'SW-Germany', 'Switzerland', 'N-Italy', 'SE-Germany'),
stringsAsFactors = FALSE))
## ord population popname
## 1 1 roe_8 SW-France
## 2 2 roe_15 SW-Germany
## 3 3 roe_25 Switzerland
## 4 4 roe_1 N-Italy
## 5 5 roe_2 SE-Germany
roered_pops_df <- rbind(roe_pops_or,red_pops_or)
point_data <- plyr::join(point_data, roered_pops_df, type='left', by='population')
kernel_data <- plyr::join(kernel_data, roered_pops_df, type='left', by='population')
#### Figure: Proportion of forest according to tcd and clc in day and night ####
par(mfrow=c(2,1), mar=c(10,4,2,2))
prop <- aggregate(. ~ popname + time ,data=point_data[, c('popname','total','time','tcd_forest_abs','clc_forest_abs','tcd_open_abs','clc_open_abs')],sum)
prop$tcd_forest_abs <- prop$tcd_forest_abs/prop$total
prop$clc_forest_abs <- prop$clc_forest_abs/prop$total
prop$tcd_open_abs <- prop$tcd_open_abs/prop$total
prop$clc_open_abs <- prop$clc_open_abs/prop$total
prop <- plyr::join(prop,roered_pops_df[,c('ord','popname')], type='left', by='popname')
prop <- prop[order(prop$ord, prop$time),]
a <- t(as.matrix(prop[,c('tcd_forest_abs','tcd_open_abs')]))
b <- t(as.matrix(prop[,c('clc_forest_abs','clc_open_abs')]))
colnames(a) <- paste0(prop$popname, '_',prop$time)
colnames(b) <- paste0(prop$popname, '_',prop$time)
barplot(a, las=2,main='tcd', space=c(0,rep(c(0,0.5),10))[1:20])
barplot(b, las=2,main='clc', space=c(0,rep(c(0,0.5),10))[1:20])
#### Figure: confusion matrix as barplots (per population and day vs night) ####
par(mfrow=c(1,1), mar=c(10,4,2,0))
prop <- aggregate(. ~ popname + time ,
data=point_data[, c('popname','total','time','clc0tcd0','clc1tcd0','clc1tcd1','clc0tcd1')],sum)
prop$clc0tcd0 <- prop$clc0tcd0/prop$total
prop$clc1tcd0 <- prop$clc1tcd0/prop$total
prop$clc1tcd1 <- prop$clc1tcd1/prop$total
prop$clc0tcd1 <- prop$clc0tcd1/prop$total
prop <- plyr::join(prop,roered_pops_df[,c('ord','popname')], type='left', by='popname')
prop <- prop[order(prop$ord, prop$time),]
a <- t(as.matrix(prop[,c('clc1tcd1','clc0tcd1','clc0tcd0','clc1tcd0')]))
colnames(a) <- paste0(prop$popname, '_',prop$time)
barplot(a,
las=2,
main='tcd',
space=c(0,rep(c(0,0.5),10))[1:20],
col=c('forestgreen','blue','darkolivegreen1','lightblue'))
#### Figure: Bar and boxplots of forest proportion ####
par(mfrow=c(1,1), mar=c(6,0,2,0))
forest_proportion_barboxplot()
# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='pdf')
# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='png')
# export(func=forest_proportion_barboxplot(),file="../results/forest_proportion_barboxplot",res=300, ratio=1.5, type='tif')
# make a subset for each species and method (KDE/TCD)
sub_roe_point <- subset(point_data,species=='roe')
sub_red_point <- subset(point_data,species=='red')
sub_roe_kernel <- subset(kernel_data,species=='roe')
sub_red_kernel <- subset(kernel_data,species=='red')
# remove unnecessary factor levels
sub_roe_point[] <- lapply(sub_roe_point, function(x) if(is.factor(x)) factor(x) else x)
sub_red_point[] <- lapply(sub_red_point, function(x) if(is.factor(x)) factor(x) else x)
sub_roe_kernel[] <- lapply(sub_roe_kernel, function(x) if(is.factor(x)) factor(x) else x)
sub_red_kernel[] <- lapply(sub_red_kernel, function(x) if(is.factor(x)) factor(x) else x)
#### E4.1 models and model selection ####
mods <- model_selection()
models <- mods[[1]]
best <- mods[[2]]
#### E4.2 rsquares ####
(rsq_model <- lapply(models, function(x) rsquared(x)))
## [[1]]
## Response family link method R.squared
## 1 tcd_forest_prop gaussian identity none 0.5615685
##
## [[2]]
## Response family link method R.squared
## 1 clc_forest_prop gaussian identity none 0.4454718
##
## [[3]]
## Response family link method R.squared
## 1 tcd_forest_prop gaussian identity none 0.5674175
##
## [[4]]
## Response family link method R.squared
## 1 clc_forest_prop gaussian identity none 0.5050512
##
## [[5]]
## Response family link method R.squared
## 1 tcd_forest_prop gaussian identity none 0.4947636
##
## [[6]]
## Response family link method R.squared
## 1 clc_forest_prop gaussian identity none 0.1723261
##
## [[7]]
## Response family link method R.squared
## 1 tcd_forest_prop gaussian identity none 0.3505672
##
## [[8]]
## Response family link method R.squared
## 1 clc_forest_prop gaussian identity none 0.2259531
names(rsq_model) <- c('tcd_gps-roe','clc_gps-roe','tcd_kde-roe','clc_kde-roe',
'tcd_gps-red','clc_gps-red','tcd_kde-red','clc_kde-red')
rsq_rand_vs_model <- list(data.frame())
for(i in 1:length(models)){
rsq_rand_vs_model[[i]] <- cbind(rsq_model[[i]])
}
names(rsq_rand_vs_model) <- c('tcd_gps-roe','clc_gps-roe','tcd_kde-roe','clc_kde-roe',
'tcd_gps-red','clc_gps-red','tcd_kde-red','clc_kde-red')
rsq_rand_vs_model <- do.call(rbind.data.frame, rsq_rand_vs_model)
colnames(rsq_rand_vs_model) <- paste0(colnames(rsq_rand_vs_model), c('-model','-model'))
rsq_rand_vs_model$`R.squared-model` <- round(rsq_rand_vs_model$`R.squared-model`,4)
#write.csv(rsq_rand_vs_model, '../results/rsq_models.csv')
#### E4.3 model results table ####
datasets <- list(sub_roe_point,sub_roe_point,sub_roe_kernel,sub_roe_kernel,
sub_red_point,sub_red_point,sub_red_kernel,sub_red_kernel)
model_names <- c('GPS-TCD','GPS-CLC','KDE-TCD','KDE-CLC','GPS-TCD','GPS-CLC','KDE-TCD','KDE-CLC')
models_summary <- lapply(models, function(x) summary(x))
models_coef <- lapply(models_summary, function(x) as.data.frame(x$tTable[,c(1,2,4)]))
for(i in 1:length(models_coef))
{
colnames(models_coef[[i]]) <- c('Estimate','se','sig')
models_coef[[i]] <- round(models_coef[[i]],2)
# models_coef[[i]]$sig <- as.data.frame(models_coef[[i]])$sig > 0.1
models_coef[[i]][which(models_coef[[i]]$sig <= 0.1),'sig'] <- NA
models_coef[[i]][which(models_coef[[i]]$sig > 0.1),'sig'] <- '(NS)'
models_coef[[i]]$model <- c(model_names[i])
models_coef[[i]]$estimate <- paste0(models_coef[[i]]$Estimate,'+/-',models_coef[[i]]$se,models_coef[[i]]$sig)
models_coef[[i]]$species <- c(c('roe','roe','roe','roe','red','red','red','red')[i])
models_coef[[i]] <- models_coef[[i]][,c('species','estimate')]
colnames(models_coef[[i]]) <- c(model_names[i],model_names[i])
}
# GPS KDE roe
roe_models <- cbind(models_coef[[1]],models_coef[[2]],models_coef[[3]],models_coef[[4]])[,c(1,seq(2,8,2))]
# GPS KDE red
#models_coef[[8]]<- rbind(models_coef[[8]][1,],c(NA,NA,NA),models_coef[[8]][2:5,])
red_models <- cbind(models_coef[[5]],models_coef[[6]],models_coef[[7]],models_coef[[8]])[,c(1,seq(2,8,2))]
(roe_red_models <- rbind(roe_models, red_models))
## GPS-TCD GPS-TCD.1 GPS-CLC KDE-TCD
## (Intercept) roe 0.39+/-0.03NA 0.24+/-0.03NA 0.28+/-0.03NA
## timenight roe -0.27+/-0.03NA -0.19+/-0.03NA -0.09+/-0.02NA
## populationroe_25 roe 0.06+/-0.03NA 0.2+/-0.04NA 0.07+/-0.03NA
## populationroe_2 roe 0.44+/-0.03NA 0.61+/-0.04NA 0.5+/-0.04NA
## populationroe_15 roe 0+/-0.03(NS) 0.12+/-0.05NA 0+/-0.03(NS)
## populationroe_1 roe 0.5+/-0.04NA 0.46+/-0.05NA 0.44+/-0.04NA
## (Intercept)1 red 0.73+/-0.03NA 0.62+/-0.05NA 0.73+/-0.02NA
## timenight1 red -0.31+/-0.02NA -0.15+/-0.04NA -0.05+/-0.02NA
## populationred_3 red -0.04+/-0.04(NS) 0.01+/-0.08(NS) -0.1+/-0.04NA
## populationred_21 red 0.23+/-0.03NA 0.13+/-0.06NA 0.19+/-0.03NA
## populationred_17 red 0.17+/-0.03NA 0.26+/-0.05NA 0.09+/-0.03NA
## populationred_14 red 0.05+/-0.04(NS) -0.02+/-0.06(NS) -0.16+/-0.04NA
## KDE-CLC
## (Intercept) 0.16+/-0.03NA
## timenight -0.09+/-0.03NA
## populationroe_25 0.22+/-0.03NA
## populationroe_2 0.64+/-0.04NA
## populationroe_15 0.15+/-0.04NA
## populationroe_1 0.43+/-0.05NA
## (Intercept)1 0.66+/-0.04NA
## timenight1 -0.04+/-0.03(NS)
## populationred_3 -0.08+/-0.05(NS)
## populationred_21 0.14+/-0.05NA
## populationred_17 0.18+/-0.04NA
## populationred_14 -0.14+/-0.05NA
#write.csv(roe_red_models,'../results/models_roe_red_round.csv')
#### E4.4 Predictions ####
# Preparation
# data frames to define order of the population in the plot
# red
(red_pops_or <- data.frame(ord= c(1,2,3,4,5),
population=c('red_3','red_14','red_8','red_21','red_17'),
popname= c('N-Italy','N-Germany','SE-Germany','SE-Belgium','SW-Belgium'),
stringsAsFactors = FALSE))
## ord population popname
## 1 1 red_3 N-Italy
## 2 2 red_14 N-Germany
## 3 3 red_8 SE-Germany
## 4 4 red_21 SE-Belgium
## 5 5 red_17 SW-Belgium
# roe
(roe_pops_or <-data.frame(ord=1:5,
population=c('roe_8','roe_15','roe_25','roe_1','roe_2'),
popname=c('SW-France','SW-Germany','Switzerland','N-Italy','SE-Germany'),
stringsAsFactors = FALSE))
## ord population popname
## 1 1 roe_8 SW-France
## 2 2 roe_15 SW-Germany
## 3 3 roe_25 Switzerland
## 4 4 roe_1 N-Italy
## 5 5 roe_2 SE-Germany
# repeat the dataframe so to use them accordingly in the loop
roered_popsor <- list(roe_pops_or,roe_pops_or,roe_pops_or,roe_pops_or,red_pops_or,red_pops_or,red_pops_or,red_pops_or)
# predictions
datasets <- list(sub_roe_point,sub_roe_point,sub_roe_kernel,sub_roe_kernel,sub_red_point,sub_red_point,sub_red_kernel,sub_red_kernel)
res <- list(data.frame())
for(i in 1:length(models)){
# create new datset with all combinations of populations and day/night
newdata = unique(datasets[[i]][,c('population','time')])
# predict using the type response and include se.fit
pr <- predict(models[[i]], newdata, type="response",se.fit=TRUE)
# add the fit and se to the newdata dataframe
newdata$pred <- pr$fit
newdata$se.pred <- pr$se.fit
# get the family of the model
family <- family(models[[i]])
# perform linkinv to get the upper and lower confidence levels
newdata$lower <- family$linkinv(pr$fit - qnorm(0.95) * pr$se.fit)
newdata$upper <- family$linkinv(pr$fit + qnorm(0.95) * pr$se.fit)
# calculate the probabilities from the predictions
newdata$probs <- exp(newdata$pred)/(1+exp(newdata$pred)) #gives you probability that y=1 for each observation
# join the population table created above (roered_popsor)
newdata<- plyr::join(newdata, roered_popsor[[i]],type='left',by='population')
# sort based on the column ord (order) and time
res[[i]]<- newdata[order(newdata$ord,newdata$time),]
}
modnames <- c('roe_gps_tcd','roe_gps_clc','roe_kde_tcd','roe_kde_clc','red_gps_tcd','red_gps_clc','red_kde_tcd','red_kde_clc')
names(res) <- modnames
# increase in proportional use between night and day
dvn <- list()
nvd <- list()
for(i in 1:length(res)){
dvn[[i]] <- (subset(res[[i]], time == 'day')$pred ) / (subset(res[[i]],time=='night')$pred)
nvd[[i]] <- (subset(res[[i]], time == 'night')$pred ) / (subset(res[[i]],time=='day')$pred)
}
names(dvn) <- modnames
names(nvd) <- modnames
dvn_mean <- lapply(dvn, function(x) mean(x))
dvn_sd <- lapply(dvn, function(x) sd(x))
dvn_min <- lapply(dvn, function(x) min(x))
dvn_max <- lapply(dvn, function(x) max(x))
nvd_mean <- lapply(nvd, function(x) mean(x))
nvd_sd <- lapply(nvd, function(x) sd(x))
nvd_min <- lapply(nvd, function(x) min(x))
nvd_max <- lapply(nvd, function(x) max(x))
dvnnvd <- as.data.frame(unlist(dvn_mean))
colnames(dvnnvd) <- 'mean_dvn'
dvnnvd$sd_dvn <- as.vector(unlist(dvn_sd))
dvnnvd$min_dvn <- as.vector(unlist(dvn_min))
dvnnvd$max_dvn <- as.vector(unlist(dvn_max))
dvnnvd$mean_nvd <- 1 - as.vector(unlist(nvd_mean))
dvnnvd$sd_nvd <- as.vector(unlist(nvd_sd))
dvnnvd$min_nvd <- 1 - as.vector(unlist(nvd_min))
dvnnvd$max_nvd <- 1 - as.vector(unlist(nvd_max))
#write.csv(dvnnvd, '../results/difference_between_dayandnight.csv')
#### E4.5 Figure 4: Predictions Plot ####
res <- res[c(5:8,1:4)]
best <- best[c(5:8,1:4),]
plot_predictions()
# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='pdf')
# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='png')
# export(func=plot_predictions(),file="../results/FIGURE4",res=600, ratio=2, type='tif')