library(lfe) library(reshape2) library(htmltab) library(purrr) library(httr) library(plyr) library(readxl) library(zoo) library(RColorBrewer) library("texreg") darkcols <- c(brewer.pal(8, "Dark2"),brewer.pal(8, "Paired")) ###################################################################################################### ###################################################################################################### # Useful functions ###################################################################################################### simplelag <-function(x,by=NULL,mylag=1,outside=NA) { myend<-length(x)- mylag if (!is.null(by)) { lby<-c(replicate(mylag,""),as.character(by[1:myend])) y0<-c(replicate(mylag,outside),x[1:myend]) y<-ifelse(as.character(by)==lby,y0,outside) } else { y<-c(replicate(mylag,outside),x[1:myend]) } } retain<-function(x,event,outside=NA) { indices <- c(1,which(event==TRUE), length(x)+1) values <- c(outside,x[event==TRUE]) y<- rep(values, diff(indices)) } ###################################################################################################### ###################################################################################################### # SOURCES ###################################################################################################### setwd("D:\\Olivier\\ownCloud\\Data\\Covid19") #dc<-read.csv2("French_Dept/DC_Jan-Avr_2018-2020_det.csv") dc<-read.csv2("France_dc/DC_jan2018-avr2020_det.csv") dc_age<-read.csv2("France_dc/2020-04-30_deces_parage_regdept.csv") #Covid deaths per age and region dccov<-read.csv2("https://www.data.gouv.fr/fr/datasets/r/08c18e08-6780-452d-9b8c-ae244ad529b3") #Covid deaths per gender and departement dccov_2<-read.csv2("https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7") #French regions and departments france<-read.csv("https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv",encoding="UTF-8") #DATA ON AGE AND INCOME PER DEPARTMENT dep_age<-read_excel("French_Dept/TCRD_021.xls",skip=3) str(dep_age) colnames(dep_age)<-c("code","dept","number","p_female","p_male","p_age0_24","p_age25_59", "p_age60_pl","p_age75_pl") dep_inc<-read_excel("French_Dept/TCRD_022.xls",skip=4) colnames(dep_inc)<-c("code","dept","number","p_imposed","median_inc","D1_inc","D9_inc") str(dep_inc) #SOURCES france<-read.csv("https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv",encoding="UTF-8") # Original source for French population. Downloaded on my web site to increase spead. # url<-"https://www.insee.fr/fr/statistiques/fichier/3545833/ensemble.xls" # GET(url, write_disk(tf <- tempfile(fileext = ".xls"))) # france_pop <- read_excel(tf, 2L,skip=7) # write.csv(france_pop,file="france_pop.csv",fileEncoding="UTF-8") france_pop<-read.csv("http://olivier.godechot.free.fr/hopfichiers/france_pop.csv",encoding="UTF-8") colnames(france_pop)<-c("X","code_region","nom_region","code_departement","nom_departement","nombre_arrondissements" ,"nombre_cantons","nombre_communes","population_municipale","population_totale") france_reg_pop<-as.data.frame.table(tapply(france_pop$population_totale,france_pop$code_region,sum)) colnames(france_reg_pop)<-c("code","population") france_dep_pop<-as.data.frame.table(tapply(france_pop$population_totale,france_pop$code_departement,sum)) colnames(france_dep_pop)<-c("code","population") france_dep_pop2<-as.data.frame(map2("https://fr.wikipedia.org/wiki/Liste_des_d%C3%A9partements_fran%C3%A7ais_class%C3%A9s_par_population_et_superficie",1, htmltab,rm_nodata_cols = F)) lasctol<-ncol(france_dep_pop2) colnames(france_dep_pop2)[c(2,lasctol)]<-c("code","area") str(france_dep_pop2) #Hospital Beds france_beds<-read.csv2("http://olivier.godechot.free.fr/hopfichiers/france_hospital_beds.csv", skip=6,header=TRUE,encoding="latin1") france_beds<-france_beds[,c(1,15,27)] colnames(france_beds)<-c("unit","beds2000","beds2012") ###################################################################################################### ###################################################################################################### # DATA STEPS ###################################################################################################### # dc data dc$AGE<-dc$ADEC-dc$ANAIS dc$un<-1 dc$date<-as.Date(paste(dc$ADEC,dc$MDEC,dc$JDEC,sep="-")) max(dc$date) dc$day<-as.numeric(dc$date) dc$day2<-ifelse(format(dc$date,"%Y")=="2018",dc$day+365*2, ifelse(format(dc$date,"%Y")=="2019",dc$day+365, dc$day)) dayconfinement<-as.Date("2020-03-17") daystart<-as.Date("2020-01-01") dayend<-max(dc$date) dc$confinement<-(dc$date>=dayconfinement)*1 dc$confinement_period<-(dc$day2>=as.integer(dayconfinement))*1 dc$day3<-dc$day2-as.numeric(daystart)+1 dc$week<-floor((dc$day2-as.numeric(daystart))/7)+1 lastweeknbdays<-as.numeric(dayend)-as.numeric(daystart)+1-floor((as.numeric(dayend)-as.numeric(daystart))/7)*7 dc$age_cat<-cut(dc$AGE,c(-1,29,49,64,74,84,130)) dc$weekday<-weekdays(dc$date) dc$un<-1 dc$cl_age90<-10*(floor(dc$AGE/10)+1)-1 dc$cl_age90<-ifelse(dc$cl_age90>89,90,dc$cl_age90) # dccov data dccov$date<-as.Date(dccov$jour) dccov$day<-as.numeric(dccov$date) dccov$week<-floor((dccov$day-as.numeric(daystart))/7)+1 dayend2<-max(dccov$day) dccov<-dccov[order(dccov$reg,dccov$cl_age90,dccov$day),] dccov$new_deaths<-dccov$dc-simplelag(dccov$dc,paste(dccov$reg,dccov$cl_age90,sep="#"),outside=0) dc<-dc[dc$day2<=as.integer(dayend2),] lastweeknbdays2<-as.numeric(dayend2)-as.numeric(daystart)+1-floor((as.numeric(dayend2)-as.numeric(daystart))/7)*7 #french_beds france_beds$code_dep<-ifelse(substr(france_beds$unit,1,1)=="D",substr(france_beds$unit,2,4),NA) france_beds$code_reg<-ifelse(substr(france_beds$unit,1,1)=="R",substr(france_beds$unit,2,4),NA) #France france$location<-france$maille_nom france$total_deaths<-france$deces france$date<-as.Date(france$date) france$day<-as.numeric(france$date) france$week<-floor((france$day-as.numeric(daystart))/7)+1 france_fr<-france[france$maille_code=="FRA",] france_fr<-france_fr[order(france_fr$day),] #French all france_fr<-france[france$maille_code %in% "FRA",] france_fr<-france_fr[order(france_fr$location,france_fr$date,-france_fr$total_deaths),] france_fr$sel<-ave((france_fr$location !=""),paste(france_fr$location,france_fr$date),FUN=function(x) cumsum(x)) france_fr<-france_fr[france_fr$sel %in% 1,] row.names(france_fr)<-NULL france_fr$cas_confirmes_approx<-ave(france_fr$cas_confirmes,france_fr$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_fr$death_approx<-ave(france_fr$total_deaths,france_fr$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_fr$hospitalises_approx<-ave(france_fr$hospitalises,france_fr$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_fr$gueris_approx<-ave(france_fr$gueris,france_fr$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_fr$reanimation_approx<-ave(france_fr$reanimation,france_fr$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_fr$mild_cases_approx<-france_fr$cas_confirmes_approx-france_fr$death_approx-france_fr$hospitalises_approx-france_fr$gueris_approx france_fr$hospitalises_evol<-france_fr$hospitalises_approx-simplelag(france_fr$hospitalises_approx,france_fr$location,outside=0) france_fr$reanimation_evol<-france_fr$reanimation_approx-simplelag(france_fr$reanimation_approx,france_fr$location,outside=0) france_fr$lcas_confirmes_approx<-simplelag(france_fr$cas_confirmes_approx,outside=0) france_fr$lmild_cases_approx<-simplelag(france_fr$mild_cases_approx,outside=0) france_fr$llocation<-simplelag(as.character(france_fr$location),outside = "") france_fr$continuity_approx0<-ifelse(as.character(france_fr$location) != france_fr$llocation, 0, ifelse(is.na(france_fr$lmild_cases_approx)==TRUE, france_fr$lcas_confirmes_approx-france_fr$hospitalises_approx+france_fr$gueris_approx+france_fr$death_approx, france_fr$lmild_cases_approx)) france_fr$continuity_approx<-retain(france_fr$continuity_approx0, (is.na(france_fr$cas_confirmes_approx)==TRUE & is.na(france_fr$lcas_confirmes_approx)==FALSE) | as.character(france_fr$location) != france_fr$llocation ,0) france_fr$total_cases<-ifelse(is.na(france_fr$cas_confirmes_approx) %in% FALSE,france_fr$cas_confirmes_approx, france_fr$hospitalises_approx+france_fr$gueris_approx+france_fr$death_approx+france_fr$continuity_approx) france_fr$new_deaths<-france_fr$total_deaths-simplelag(france_fr$total_deaths,france_fr$location,outside=0) france_fr$new_cases<-france_fr$total_cases-simplelag(france_fr$total_cases,france_fr$location,outside=0) france_fr$code<-substr(france_fr$maille_code,5,6) #france_fr<-merge(france_fr,france_fr_pop,by="code",all.x=TRUE) france_fr$cases_2_pop<-10000*france_fr$total_cases/sum(france_pop$population_totale) france_fr$deaths_2_pop<-100000*france_fr$death_approx/sum(france_pop$population_totale) france_fr<-france_fr[order(france_fr$location,france_fr$date,-france_fr$total_deaths),] #French departments france_dep<-france[france$granularite %in% "departement",] france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep$sel<-ave((france_dep$location !=""),paste(france_dep$location,france_dep$date),FUN=function(x) cumsum(x)) france_dep<-france_dep[france_dep$sel %in% 1,] france_dep$cas_confirmes_approx<-ave(france_dep$cas_confirmes,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$death_approx<-ave(france_dep$total_deaths,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$hospitalises_approx<-ave(france_dep$hospitalises,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$gueris_approx<-ave(france_dep$gueris,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$reanimation_approx<-ave(france_dep$reanimation,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$hospitalises_evol<-france_dep$hospitalises_approx-simplelag(france_dep$hospitalises_approx,france_dep$location,outside=0) france_dep$reanimation_evol<-france_dep$reanimation_approx-simplelag(france_dep$reanimation_approx,france_dep$location,outside=0) france_dep$mild_cases_approx<-france_dep$cas_confirmes_approx-france_dep$death_approx-france_dep$hospitalises_approx-france_dep$gueris_approx france_dep$lcas_confirmes_approx<-simplelag(france_dep$cas_confirmes_approx,outside=0) france_dep$lmild_cases_approx<-simplelag(france_dep$mild_cases_approx,outside=0) france_dep$llocation<-simplelag(as.character(france_dep$location),outside = "") france_dep$continuity_approx0<-ifelse(as.character(france_dep$location) != france_dep$llocation, 0, ifelse(is.na(france_dep$lmild_cases_approx)==TRUE, france_dep$lcas_confirmes_approx-france_dep$hospitalises_approx+france_dep$gueris_approx+france_dep$death_approx, france_dep$lmild_cases_approx)) france_dep$continuity_approx<-retain(france_dep$continuity_approx0, (is.na(france_dep$cas_confirmes_approx)==TRUE & is.na(france_dep$lcas_confirmes_approx)==FALSE) | as.character(france_dep$location) != france_dep$llocation ,0) france_dep$total_cases<-ifelse(is.na(france_dep$cas_confirmes_approx) %in% FALSE,france_dep$cas_confirmes_approx, france_dep$hospitalises_approx+france_dep$gueris_approx+france_dep$death_approx+france_dep$continuity_approx) france_dep$new_deaths<-france_dep$total_deaths-simplelag(france_dep$total_deaths,france_dep$location,outside=0) france_dep$new_cases<-france_dep$total_cases-simplelag(france_dep$total_cases,france_dep$location,outside=0) france_dep$code<-substr(france_dep$maille_code,5,7) france_dep<-merge(france_dep,france_dep_pop,by="code",all.x=TRUE) france_dep_pop2$code<-ifelse(nchar(france_dep_pop2$code)==1,paste("0",france_dep_pop2$code,sep=""),france_dep_pop2$code) france_dep<-merge(france_dep,france_dep_pop2[,c("code","area")],by="code",all.x=TRUE) france_dep$area<-iconv(france_dep$area,"UTF-8","latin1") france_dep$area2<-gsub("\u00A0","",france_dep$area) france_dep$density<-france_dep$population/as.numeric(france_dep$area2) france_dep$cases_2_pop<-10000*france_dep$total_cases/france_dep$population france_dep$deaths_2_pop<-100000*france_dep$death_approx/france_dep$population france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep<-merge(france_dep,france_beds,by.x="code",by.y="code_dep",all.x=TRUE) france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep$nb_beds<-france_dep$beds2012*france_dep$population/100000 france_dep$hospitalized_per_beds<-france_dep$hospitalises/france_dep$nb_beds france_dep$reanimation_per_beds<-france_dep$reanimation/france_dep$nb_beds france_dep$calendar_day<-as.numeric(france_dep$date)-min(as.numeric(france_dep$date))+1 france_dep$confinement_day<-pmax(as.numeric(france_dep$date)-as.numeric(as.Date("2020-03-17"))+1,0) france_dep$threshold_day<-ave(ifelse(france_dep$death_approx>10,as.numeric(france_dep$date),999999999),france_dep$location,FUN=function(x) min(x,na.rm=TRUE)) france_dep$day_from_threshold<-ifelse(france_dep$threshold_day<999999999,as.numeric(france_dep$date)-france_dep$threshold_day+1,NA) france_dep$day_from_threshold<-ifelse(france_dep$day_from_threshold<=0,NA,france_dep$day_from_threshold) france_dep$time<-factor(france_dep$day_from_threshold) france_dep$nb_beds_2012<-france_dep$nb_beds france_dep$beds_2_pop_2012<-france_dep$beds2012 france_dep$beds_decline<-france_dep$beds2000-france_dep$beds2012 france_dep<-merge(france_dep,dep_age,by="code",all.x=TRUE) france_dep<-merge(france_dep,dep_inc,by="code",all.x=TRUE) france_dep<-france_dep[order(france_dep$location,france_dep$date),] france_dep$total_cases_approx<-ave(france_dep$total_cases,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$total_deaths_approx<-ave(france_dep$total_deaths,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$new_cases_approx<-france_dep$total_cases_approx-simplelag(france_dep$total_cases_approx,france_dep$location,outside=0) france_dep$new_deaths_approx<-france_dep$total_deaths_approx-simplelag(france_dep$total_deaths_approx,france_dep$location,outside=0) #French regions france_reg<-france[france$granularite %in% "region",] france_reg<-france_reg[order(france_reg$location,france_reg$date,-france_reg$total_deaths),] france_reg$sel<-ave((france_reg$location !=""),paste(france_reg$location,france_reg$date),FUN=function(x) cumsum(x)) france_reg<-france_reg[france_reg$sel %in% 1,] row.names(france_reg)<-NULL france_reg$cas_confirmes_approx<-ave(france_reg$cas_confirmes,france_reg$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_reg$death_approx<-ave(france_reg$total_deaths,france_reg$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_reg$hospitalises_approx<-ave(france_reg$hospitalises,france_reg$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_reg$gueris_approx<-ave(france_reg$gueris,france_reg$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_reg$reanimation_approx<-ave(france_reg$reanimation,france_reg$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_reg$mild_cases_approx<-france_reg$cas_confirmes_approx-france_reg$death_approx-france_reg$hospitalises_approx-france_reg$gueris_approx france_reg$hospitalises_evol<-france_reg$hospitalises_approx-simplelag(france_reg$hospitalises_approx,france_reg$location,outside=0) france_reg$reanimation_evol<-france_reg$reanimation_approx-simplelag(france_reg$reanimation_approx,france_reg$location,outside=0) france_reg$lcas_confirmes_approx<-simplelag(france_reg$cas_confirmes_approx,outside=0) france_reg$lmild_cases_approx<-simplelag(france_reg$mild_cases_approx,outside=0) france_reg$llocation<-simplelag(as.character(france_reg$location),outside = "") france_reg$continuity_approx0<-ifelse(as.character(france_reg$location) != france_reg$llocation, 0, ifelse(is.na(france_reg$lmild_cases_approx)==TRUE, france_reg$lcas_confirmes_approx-france_reg$hospitalises_approx+france_reg$gueris_approx+france_reg$death_approx, france_reg$lmild_cases_approx)) france_reg$continuity_approx<-retain(france_reg$continuity_approx0, (is.na(france_reg$cas_confirmes_approx)==TRUE & is.na(france_reg$lcas_confirmes_approx)==FALSE) | as.character(france_reg$location) != france_reg$llocation ,0) france_reg$total_cases<-ifelse(is.na(france_reg$cas_confirmes_approx) %in% FALSE,france_reg$cas_confirmes_approx, france_reg$hospitalises_approx+france_reg$gueris_approx+france_reg$death_approx+france_reg$continuity_approx) france_reg$new_deaths<-france_reg$total_deaths-simplelag(france_reg$total_deaths,france_reg$location,outside=0) france_reg$new_cases<-france_reg$total_cases-simplelag(france_reg$total_cases,france_reg$location,outside=0) france_reg$code<-substr(france_reg$maille_code,5,6) france_reg<-merge(france_reg,france_reg_pop,by="code",all.x=TRUE) france_reg$cases_2_pop<-10000*france_reg$total_cases/france_reg$population france_reg$deaths_2_pop<-100000*france_reg$death_approx/france_reg$population france_reg<-france_reg[order(france_reg$location,france_reg$date,-france_reg$total_deaths),] #French departments france_dep<-france[france$granularite %in% "departement",] france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep$sel<-ave((france_dep$location !=""),paste(france_dep$location,france_dep$date),FUN=function(x) cumsum(x)) france_dep<-france_dep[france_dep$sel %in% 1,] france_dep$cas_confirmes_approx<-ave(france_dep$cas_confirmes,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$death_approx<-ave(france_dep$total_deaths,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$hospitalises_approx<-ave(france_dep$hospitalises,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$gueris_approx<-ave(france_dep$gueris,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$reanimation_approx<-ave(france_dep$reanimation,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$hospitalises_evol<-france_dep$hospitalises_approx-simplelag(france_dep$hospitalises_approx,france_dep$location,outside=0) france_dep$reanimation_evol<-france_dep$reanimation_approx-simplelag(france_dep$reanimation_approx,france_dep$location,outside=0) france_dep$mild_cases_approx<-france_dep$cas_confirmes_approx-france_dep$death_approx-france_dep$hospitalises_approx-france_dep$gueris_approx france_dep$lcas_confirmes_approx<-simplelag(france_dep$cas_confirmes_approx,outside=0) france_dep$lmild_cases_approx<-simplelag(france_dep$mild_cases_approx,outside=0) france_dep$llocation<-simplelag(as.character(france_dep$location),outside = "") france_dep$continuity_approx0<-ifelse(as.character(france_dep$location) != france_dep$llocation, 0, ifelse(is.na(france_dep$lmild_cases_approx)==TRUE, france_dep$lcas_confirmes_approx-france_dep$hospitalises_approx+france_dep$gueris_approx+france_dep$death_approx, france_dep$lmild_cases_approx)) france_dep$continuity_approx<-retain(france_dep$continuity_approx0, (is.na(france_dep$cas_confirmes_approx)==TRUE & is.na(france_dep$lcas_confirmes_approx)==FALSE) | as.character(france_dep$location) != france_dep$llocation ,0) france_dep$total_cases<-ifelse(is.na(france_dep$cas_confirmes_approx) %in% FALSE,france_dep$cas_confirmes_approx, france_dep$hospitalises_approx+france_dep$gueris_approx+france_dep$death_approx+france_dep$continuity_approx) france_dep$new_deaths<-france_dep$total_deaths-simplelag(france_dep$total_deaths,france_dep$location,outside=0) france_dep$new_cases<-france_dep$total_cases-simplelag(france_dep$total_cases,france_dep$location,outside=0) france_dep$code<-substr(france_dep$maille_code,5,7) france_dep<-merge(france_dep,france_dep_pop,by="code",all.x=TRUE) france_dep_pop2$code<-ifelse(nchar(france_dep_pop2$code)==1,paste("0",france_dep_pop2$code,sep=""),france_dep_pop2$code) france_dep<-merge(france_dep,france_dep_pop2[,c("code","area")],by="code",all.x=TRUE) france_dep$area<-iconv(france_dep$area,"UTF-8","latin1") france_dep$area2<-gsub("\u00A0","",france_dep$area) france_dep$density<-france_dep$population/as.numeric(france_dep$area2) france_dep$cases_2_pop<-10000*france_dep$total_cases/france_dep$population france_dep$deaths_2_pop<-100000*france_dep$death_approx/france_dep$population france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep<-merge(france_dep,france_beds,by.x="code",by.y="code_dep",all.x=TRUE) france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep<-france_dep[order(france_dep$location,france_dep$date,-france_dep$total_deaths),] france_dep$nb_beds<-france_dep$beds2012*france_dep$population/100000 france_dep$hospitalized_per_beds<-france_dep$hospitalises/france_dep$nb_beds france_dep$reanimation_per_beds<-france_dep$reanimation/france_dep$nb_beds france_dep$calendar_day<-as.numeric(france_dep$date)-min(as.numeric(france_dep$date))+1 france_dep$confinement_day<-pmax(as.numeric(france_dep$date)-as.numeric(as.Date("2020-03-17"))+1,0) france_dep$threshold_day<-ave(ifelse(france_dep$death_approx>10,as.numeric(france_dep$date),999999999),france_dep$location,FUN=function(x) min(x,na.rm=TRUE)) france_dep$day_from_threshold<-ifelse(france_dep$threshold_day<999999999,as.numeric(france_dep$date)-france_dep$threshold_day+1,NA) france_dep$day_from_threshold<-ifelse(france_dep$day_from_threshold<=0,NA,france_dep$day_from_threshold) france_dep$time<-factor(france_dep$day_from_threshold) france_dep$nb_beds_2012<-france_dep$nb_beds france_dep$beds_2_pop_2012<-france_dep$beds2012 france_dep$beds_decline<-france_dep$beds2000-france_dep$beds2012 france_dep<-merge(france_dep,dep_age,by="code",all.x=TRUE) france_dep<-merge(france_dep,dep_inc,by="code",all.x=TRUE) france_dep<-france_dep[order(france_dep$location,france_dep$date),] france_dep$total_cases_approx<-ave(france_dep$total_cases,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$total_deaths_approx<-ave(france_dep$total_deaths,france_dep$location,FUN=function(x) na.approx(x,na.rm=FALSE)) france_dep$new_cases_approx<-france_dep$total_cases_approx-simplelag(france_dep$total_cases_approx,france_dep$location,outside=0) france_dep$new_deaths_approx<-france_dep$total_deaths_approx-simplelag(france_dep$total_deaths_approx,france_dep$location,outside=0) france_reg$new_deaths<-france_fr$total_deaths-simplelag(france_reg$total_deaths,france_reg$location,outside=0) tapply(france_fr$new_deaths,france_fr$week,sum,na.rm=TRUE)[1:9] sum(france_fr$new_deaths,na.rm=TRUE) str(france_fr) nursing_home_deaths<-france_fr$deces_ehpad[france_fr$date=="2020-04-14"] france_fr$deces[france_fr$date=="2020-04-14"] ############################################### # Graphs 2020 over-mortality per age ############################################### mycol<-c("gray60","gray70","black","red","darkorange1","darkviolet","deepskyblue") mypch<-c(2,6,17,16,20,15,26) mylty<-c(2,2,1,1,1,1,2) results<-NULL gdata<-function(agecat,data=dc) { # data<-dc2 # agecat<-0 agecat2<-paste("Age: ",floor(agecat/10)*10,"-",agecat,sep="") if(agecat==0) {agecat2<-"Age: all"} if(agecat==90) {agecat2<-"Age: 90 +"} mydc<-data w<-as.data.frame.matrix(table(mydc$week[mydc$cl_age90==agecat],mydc$ADEC[mydc$cl_age90==agecat])) colnames(w)<-c("y2018","y2019","y2020") w$week<-as.numeric(rownames(w)) w$y2020[w$week==16]<-NA #w$y2020[w$week==16]*7/lastweeknbdays w$y2020[w$week==17]<-NA w$y2020[w$week==18]<-NA w$y2019[w$week==18]<-w$y2019[w$week==18]*7/lastweeknbdays2 w$y2018[w$week==18]<-w$y2018[w$week==18]*7/lastweeknbdays2 w$y2018_19<-0.5*(w$y2018+w$y2019) wcov<-data.frame(tapply(dccov$new_deaths[dccov$cl_age90==agecat],dccov$week[dccov$cl_age90==agecat],sum)) colnames(wcov)<-"covid_hospital_deaths" wcov$week<-as.numeric(rownames(wcov)) w<-merge(w,wcov,by="week",all.x=TRUE) w<-w[order(w$week),] correction_week12<-sum(tapply(france_fr$new_deaths,france_fr$week,sum,na.rm=TRUE)[9])/sum(tapply(france_fr$new_deaths,france_fr$week,sum,na.rm=TRUE)[1:9]) correction_week11<-sum(tapply(france_fr$new_deaths,france_fr$week,sum,na.rm=TRUE)[8])/sum(tapply(france_fr$new_deaths,france_fr$week,sum,na.rm=TRUE)[1:9]) correction_week10<-1-correction_week11-correction_week12 w$covid_hospital_deaths[w$week==9]<-0 w$covid_hospital_deaths[w$week==10]<-correction_week10*w$covid_hospital_deaths[w$week==12] w$covid_hospital_deaths[w$week==11]<-correction_week11*w$covid_hospital_deaths[w$week==12] w$covid_hospital_deaths[w$week==12]<-correction_week12*w$covid_hospital_deaths[w$week==12] w$y2020_without_cov<-w$y2020-w$covid_hospital_deaths w$covid_hospital_deaths_scaled<-retain(w$y2020,w$week==9)+w$covid_hospital_deaths w rightmax<-max( c(max(w$covid_hospital_deaths_scaled[w$week>9],na.rm=TRUE), max(w$y2020_without_cov[w$week>9],na.rm=TRUE), max(w$y2020[w$week>9],na.rm=TRUE))-w$covid_hospital_deaths_scaled[w$week==9]) if (rightmax>10) { roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) { if(length(x) != 1) stop("'x' must be of length 1") 10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]] } rightmax2<-roundUpNice(rightmax) ticks<-c(0,rightmax2/4,2*rightmax2/4,3*rightmax2/4,4*rightmax2/4,5*rightmax2/4) } else {ticks<-c(0,rightmax) } posticks<-ticks+w$covid_hospital_deaths_scaled[w$week==9] g<-w[,c("y2018","y2019","y2018_19","y2020","y2020_without_cov","covid_hospital_deaths_scaled")] matplot(g,pch=mypch, ylab=list("Number of deaths per week",cex=0.8),xlab="",col=mycol,xaxt="n",cex.axis=0.8,las=2) min_a_2 <- pmin(w$y2018_19[w$week>8 & w$week<16]#,w$y2020_without_cov[w$week>9 & w$week<16] ) max_a_2 <- pmax(#w$y2018_19[w$week>9 & w$week<16], w$y2020_without_cov[w$week>8 & w$week<16]) polygon(c(w$week[w$week>8 & w$week<16], rev(w$week[w$week>8 & w$week<16])), c(max_a_2 ,rev(min_a_2)), col = adjustcolor("goldenrod1",alpha.f=0.5),border=NA) min_a <- pmin(w$y2020[w$week>8 & w$week<16],w$y2020_without_cov[w$week>8 & w$week<16]) max_a <- pmax(w$y2020[w$week>8 & w$week<16],w$y2020_without_cov[w$week>8 & w$week<16]) polygon(c(w$week[w$week>8 & w$week<16], rev(w$week[w$week>8 & w$week<16])), c(max_a ,rev(min_a)), col = adjustcolor("plum2",alpha.f=0.5),border=NA) title(agecat2) matlines(g,col=mycol,lty=mylty,pch=mypch) matpoints(g,col=mycol,pch=mypch) abline(v=9,lty=2,col="darkviolet") abline(h=w$y2020[w$week==9],lty=2,col="darkviolet") axis(1, at=c(1:18),c("Jan 1-7","Jan 8-14","Jan 15-21","Jan 22-28","Jan 29-Feb 4", "Feb 5-11","Feb 12-18","Feb 19-25","Feb 26-Mar 3","Mar 4-10", "Mar 11-17","Mar 18-24","Mar 25-31","Apr 1-7","Apr 8-14", "Apr 15-21","Apr 22-28","Apr 29-May 3"),cex.axis=0.8,las=2) axis(4, at=posticks, labels=ticks, col.ticks = "purple",col="purple",col.axis="purple",las=1,cex.axis=0.8) abline(v=11.5,lty=2,col="deepskyblue") m<-as.data.frame.table(cbind(tapply(w$y2018[w$week<16],w$week[w$week<16]>9,sum), tapply(w$y2019[w$week<16],w$week[w$week<16]>9,sum), tapply(w$y2020[w$week<16],w$week[w$week<16]>9,sum), tapply(w$y2020_without_cov[w$week<16],w$week[w$week<16]>9,sum))) colnames(m)<-c("period","year","deaths") levels(m$year)<-c("y2018","y2019","y2020","y2020_without_covid") m$deaths[7]<-m$deaths[5] mod1<-glm(deaths~I(period=="TRUE")*I(year=="y2020"), data=m[1:6,],family="poisson") summary(mod1) c1<-mod1$coefficients[4] se1<-summary(mod1)$coefficients[, 2][4] p1<-summary(mod1)$coefficients[, 4][4] r1<-exp(mod1$coefficients[4])-1 r1l<-exp(mod1$coefficients[4]-1.96*se1)-1 r1u<-exp(mod1$coefficients[4]+1.96*se1)-1 d1<-m$deaths[6]-m$deaths[6]/(1+r1) d1l<-m$deaths[6]-m$deaths[6]/(1+r1l) d1u<-m$deaths[6]-m$deaths[6]/(1+r1u) mod2<-glm(deaths~I(period=="TRUE")*I(year=="y2020_without_covid"), data=m[c(1:4,7:8),],family="poisson") summary(mod2) c2<-mod2$coefficients[4] se2<-summary(mod2)$coefficients[,2][4] p2<-summary(mod2)$coefficients[,4][4] r2<-exp(mod2$coefficients[4])-1 r2l<-exp(mod2$coefficients[4]-1.96*se2)-1 r2u<-exp(mod2$coefficients[4]+1.96*se2)-1 d2<-m$deaths[6]-m$deaths[6]/(1+r2) d2l<-m$deaths[6]-m$deaths[6]/(1+r2l) d2u<-m$deaths[6]-m$deaths[6]/(1+r2u) mod3<-glm(deaths~I(year=="y2020"), data=m[c(2,4,6),],family="poisson") summary(mod3) c3<-mod3$coefficients[2] se3<-summary(mod3)$coefficients[,2][2] p3<-summary(mod3)$coefficients[,4][2] r3<-exp(mod3$coefficients[2])-1 r3l<-exp(mod3$coefficients[2]-1.96*se3)-1 r3u<-exp(mod3$coefficients[2]+1.96*se3)-1 d3<-m$deaths[6]-m$deaths[6]/(1+r3) d3l<-m$deaths[6]-m$deaths[6]/(1+r3l) d3u<-m$deaths[6]-m$deaths[6]/(1+r3u) d18_19<-0.5*(m$deaths[2]+m$deaths[4]) d20<-m$deaths[6] dcovhos<-m$deaths[6]-m$deaths[8] mod4<-glm(deaths~I(year=="y2020_without_covid"), data=m[c(2,4,8),],family="poisson") summary(mod4) c4<-mod4$coefficients[2] se4<-summary(mod4)$coefficients[,2][2] p4<-summary(mod4)$coefficients[,4][2] r4<-exp(mod4$coefficients[2])-1 r4l<-exp(mod4$coefficients[2]-1.96*se4)-1 r4u<-exp(mod4$coefficients[2]+1.96*se4)-1 d4<-m$deaths[6]-m$deaths[6]/(1+r4) d4l<-m$deaths[6]-m$deaths[6]/(1+r4l) d4u<-m$deaths[6]-m$deaths[6]/(1+r4u) d18_19<-0.5*(m$deaths[2]+m$deaths[4]) d20<-m$deaths[6] dcovhos<-m$deaths[6]-m$deaths[8] mod5<-glm(deaths~I(year=="y2020"), data=m[c(6,8),],family="poisson") summary(mod5) c5<-mod5$coefficients[2] se5<-summary(mod5)$coefficients[,2][2] p5<-summary(mod5)$coefficients[,4][2] r5<-exp(mod5$coefficients[2])-1 r5l<-exp(mod5$coefficients[2]-1.96*se5)-1 r5u<-exp(mod5$coefficients[2]+1.96*se5)-1 d5<-m$deaths[6]-m$deaths[6]/(1+r5) d5l<-m$deaths[6]-m$deaths[6]/(1+r5l) d5u<-m$deaths[6]-m$deaths[6]/(1+r5u) d18_19<-0.5*(m$deaths[2]+m$deaths[4]) d20<-m$deaths[6] dcovhos<-m$deaths[6]-m$deaths[8] results<-rbind.fill(results,data.frame(agecat,agecat2,d18_19,d20,dcovhos,c1,se1,p1,r1,r1l,r1u,d1,d1l,d1u, c2,se2,p2,r2,r2l,r2u,d2,d2l,d2u, c3,se3,p3,r3,r3l,r3u,d3,d3l,d3u, c4,se4,p4,r4,r4l,r4u,d4,d4l,d4u, c5,se5,p5,r5,r5l,r5u,d5,d5l,d5u)) row.names(results)<-NULL return(results) } dev.off() pdf(file="France_Mortality.pdf",paper="a4",width=8,height=11,family="Bookman") results<-NULL #par(oma=c(0,0,0,1)) par(mar=c(5.1, 4.1, 2.1, 3.1)) par(mfrow=c(4,2)) #Legend plot(0:30, type = "n", xaxt="n", yaxt="n", bty="n", xlab = "", ylab = "") title("Covid-19 epidemic and mortality\n in France by age category") legend("bottomleft",title="Legend",c("Total deaths 2018","Total deaths 2019","Average deaths 2018 and 2019", "Total deaths 2020 (provisional)", "Total deaths 2020 without Covid-19 hospital deaths", "Covid-19 hospital deaths (right scale)", "Lockdown"), pch=mypch,lty=mylty, col=mycol,bty = "n", text.col=mycol,cex=0.9,title.col = "black") dc2<-dc dc2$cl_age90<-0 results<-gdata(0,data=dc2) results<-gdata(90) results<-gdata(89) results<-gdata(79) results<-gdata(69) results<-gdata(59) results<-gdata(49) ################################################# # Page 2 par(mfrow=c(4,2)) results<-gdata(39) results<-gdata(29) results<-gdata(19) results<-gdata(9) results$agecat3<-gsub("Age: ","",results$agecat2) #dev.off() bar1<-barplot(t(cbind(c(results$d3[1],results$d1[1]), c(results$d5[1],results$d5[1]), c(results$d4[1],results$d2[1]), c(nursing_home_deaths,nursing_home_deaths))), beside=TRUE, names.arg=c("Simple difference","Difference-in-difference"), col=c(adjustcolor("red",alpha.f=0.5), adjustcolor("darkviolet",alpha.f=0.5), adjustcolor("darkorange",alpha.f=0.5), adjustcolor("darkgreen",alpha.f=0.5)), ylim=c(0,max(1.2*results$d1u[1])), las=1,xlab=list("Age: all",cex=0.8), ylab=list("Number of deaths in excess",cex=0.8),cex.axis =0.8,cex.names = 0.8) abline(h=0) title("Excess deaths between March 4th and April 14th",cex.main=1) segments(bar1, t(cbind(c(results$d3l[1],results$d1l[1]),c(NA,NA),c(results$d4l[1],results$d2l[1]),c(NA,NA))),bar1, t(cbind(c(results$d3u[1],results$d1u[1]),c(NA,NA),c(results$d4u[1],results$d2u[1]),c(NA,NA))), lwd = 0.8,col="gray30") arrows(bar1, t(cbind(c(results$d3l[1],results$d1l[1]),c(NA,NA),c(results$d4l[1],results$d2l[1]),c(NA,NA))),bar1, t(cbind(c(results$d3u[1],results$d1u[1]),c(NA,NA),c(results$d4u[1],results$d2u[1]),c(NA,NA))), lwd = 0.8, angle = 90, code = 3, length = 0.05,col="gray30") text(c(1.7,3.7,6.7,8.7),c(results$d3[1],results$d4[1],results$d1[1],results$d2[1]), paste("+",100*round(c(results$r3[1],results$r4[1],results$r1[1],results$r2[1]),2),"%") ,pos=1,cex=0.8) legend("topleft", c("Excess deaths", "Covid-19 hospital deaths", "Excess deaths without\n Covid-19 hospital deaths", "Covid-19 nursing home deaths"), fill=c(adjustcolor("red",alpha.f=0.5), c(adjustcolor("darkviolet",alpha.f=0.5)), adjustcolor("darkorange",alpha.f=0.5), adjustcolor("darkgreen",alpha.f=0.5)), ncol=1, cex=0.7,bty="n", border=1,bg=NA) bar1<-barplot(t(cbind(results$d1[-1],results$d5[-1],results$d2[-1])),beside=TRUE,names.arg=results$agecat3[-1], col=c(adjustcolor("red",alpha.f=0.5), adjustcolor("darkviolet",alpha.f=0.5), adjustcolor("darkorange",alpha.f=0.5)), ylim=c(1.01*min(results$d2l[-1]),max(1.01*results$d1u[-1])),las=2,xlab=list("Age",cex=0.8), ylab=list("Number of deaths in excess (diff-in-diff)",cex=0.8),cex.axis =0.8,cex.names = 0.8) abline(h=0) title("Excess deaths between March 4th and April 14th",cex=0.7) segments(bar1, t(cbind(results$d1l[-1],rep(NA,nrow(results)-1),results$d2l[-1])),bar1, t(cbind(results$d1u[-1],rep(NA,nrow(results)-1),results$d2u[-1])), lwd = 0.8,col="gray50") arrows(bar1, t(cbind(results$d1l[-1],rep(NA,nrow(results)-1),results$d2l[-1])),bar1, t(cbind(results$d1u[-1],rep(NA,nrow(results)-1),results$d2u[-1])), lwd = 0.8,col="gray50", angle = 90, code = 3, length = 0.01) legend("topright", c("Excess deaths","Covid-19 hospital deaths","Excess deaths without\n Covid-19 hospital deaths"), fill=c(adjustcolor("red",alpha.f=0.5), adjustcolor("darkviolet",alpha.f=0.5), adjustcolor("darkorange",alpha.f=0.5)), ncol=1, cex=0.8,bty="n", border=1,bg=NA) bar1<-barplot(t(cbind(results$r1[-1],results$r1[-1]-results$r2[-1],results$r2[-1])),beside=TRUE,names.arg=results$agecat3[-1], col=c(adjustcolor("red",alpha.f=0.5), adjustcolor("darkviolet",alpha.f=0.5), adjustcolor("darkorange",alpha.f=0.5)), ylim=c(1.3*min(results$r2l[-1]),max(1.3*results$r1u[-1])),las=2,xlab=list("Age",cex=0.8), ylab=list("Increase in deaths (diff-in-diff)",cex=0.8),cex.axis =0.8,cex.names = 0.8,yaxt="n") abline(h=0) axis(2,las=2,at=pretty(t(cbind(results$r1[-1],results$r2[-1]))), lab=paste(pretty(t(cbind(results$r1[-1],results$r2[-1])) * 100),"%"),cex.axis=0.8) title("Increase in deaths between March 4th and April 14th",cex.main=0.9) segments(bar1, t(cbind(results$r1l[-1],rep(NA,nrow(results)-1),results$r2l[-1])),bar1, t(cbind(results$r1u[-1],rep(NA,nrow(results)-1),results$r2u[-1])), lwd = 0.8,col="gray50") arrows(bar1, t(cbind(results$r1l[-1],rep(NA,nrow(results)-1),results$r2l[-1])),bar1, t(cbind(results$r1u[-1],rep(NA,nrow(results)-1),results$r2u[-1])), lwd = 0.8,col="gray50", angle = 90, code = 3, length = 0.01) legend("topright", c("Increase in deaths","Increase due to Covid-19 hospital deaths", "Increase in deaths without\n Covid-19 hospital deaths"), fill=c(adjustcolor("red",alpha.f=0.5), adjustcolor("darkviolet",alpha.f=0.5), adjustcolor("darkorange",alpha.f=0.5)), ncol=1, cex=0.8,bty="n", border=1,bg=NA) plot(0:30, type = "n", xaxt="n", yaxt="n", bty="n", xlab = "", ylab = "") title("Sources") text(1, 23, "Deaths in France",pos=4,cex=1) text(1, 21, "Insee",pos=4,cex=0.8) text(1, 19, "https://www.insee.fr/fr/information/4470857",pos=4,cex=0.6) text(1, 17, "File: Fichier individuel comportant des informations sur chaque décès",pos=4,cex=0.6) text(1, 13, "Covid deaths per age and region",pos=4,cex=1) text(1, 11, "Santé Public France - data.gouv.fr",pos=4,cex=0.8) text(1, 9, "https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/",pos=4,cex=0.6) text(1, 7, "File: donnees-hospitalieres-etablissements-covid19-2020-##-##.csv",pos=4,cex=0.6) text(1, 3, "R Script",pos=4,cex=1) text(1, 1, "http://olivier.godechot.free.fr/hopfichiers/French_Overmortality.R",pos=4,cex=0.6) dev.off()