UrbanGaiaReport.Rmd

---
title: "UrbanGaia KPI paper"
author:
  - name: Raisa Carmen 
    email: raisa.carmen@inbo.be
  - name: Sander Jacobs
    email: sander.jacobs@inbo.be
output:
  bookdown::html_document2: 
    fig_caption: yes
  bookdown::pdf_book:
    includes:
      in_header: preamble.tex
    base_format: INBOmd::inbo_rapport
    floatbarrier: "section"
    lang: english
    lof: FALSE
    lot: FALSE
    style: INBO
    keep_tex: yes
  bookdown::epub_book:
    stylesheet: css/inbo_rapport.css
    template: css/inbo.epub3
reportnr: ''
site: bookdown::bookdown_site
subtitle: ''
link-citations: yes
---


```{r setup, include = FALSE, silent=TRUE}
library(cowplot)
library(dplyr)
library(ggplot2)
library(INBOtheme)
library(kableExtra)
library(knitr)
require(moonBook)
library(readxl)
library(tidyverse)
require(webr)
library(yaml)
opts_chunk$set(
  echo = FALSE, 
  eval = TRUE,
  cache = FALSE,
  dpi = 300,
  fig.width = 150 / 25.4,
  fig.height = 100 / 25.4,
  warning = TRUE,
  error = TRUE,
  message = TRUE
)
theme_set(
  theme_inbo(
    base_family = "Calibri", # "Flanders Art Sans",
    base_size = 12
  )
)
if (!interactive() && opts_knit$get("rmarkdown.pandoc.to") != "html") {
  opts_chunk$set(dev = "cairo_pdf")
  theme_set(
    theme_inbo(
      base_family = "Calibri", # "Flanders Art Sans",
      base_size = 8
    )
  )
  #knitr::opts_knit$set(root.dir= normalizePath('..'))
}
options(tinytex.verbose = TRUE)
```


# Introduction

```{r read_data, include=FALSE, silent=TRUE, include=FALSE}
library(readr)
city<-'Leipzig'
filename <- "Indicators.xlsx"
read_excel(
  sprintf("%s/data/%s",getwd(),filename), 
  sheet = sprintf('%s_stakeholders',city), skip = 2
) %>% select(-44) %>%
  filter(!is.na(Indicator))->DataL#44 is the black border
for(i in 3:43){DataL[is.na(DataL[,i]),i]<-0}#put 'not relevant' if is was left open
names(DataL)[c(4,44:50)]<-c('Biophysical assemblages', 'Implementation', 'SpatialLevel', 'Frequency', 'Relevance', 'Feasability', 'Clarity', 'Credibility')
DataL$City<-'Leipzig'
filename <- "Indicators.xlsx"
city<-'Coimbra'
read_excel(
  sprintf("%s/data/%s",getwd(),filename), 
  sheet = city, skip = 2
) %>% select(-44) %>%
  filter(!is.na(Indicator))->DataC#44 is the black border
for(i in 3:43){DataC[is.na(DataC[,i]),i]<-0}#put 'not relevant' if is was left open
names(DataC)[44:50]<-c('Implementation','SpatialLevel','Frequency','Relevance','Feasability','Clarity','Credibility')
DataC$City<-'Coimbra'

city<-'Vilnius'
read_excel(
  sprintf("%s/data/%s",getwd(),filename), 
  sheet = sprintf('%sMUNIC',city), skip = 2
) %>% select(-44) %>%
  filter(!is.na(Indicator))->DataV#44 is the black border
for(i in 3:43){DataV[is.na(DataV[,i]),i]<-0}#put 'not relevant' if is was left open
names(DataV)[c(4,44:50)]<-c('Biophysical assemblages', 'Implementation', 'SpatialLevel', 'Frequency', 'Relevance', 'Feasability', 'Clarity', 'Credibility')
DataV$City<-'Vilnius'

city<-'Genk'
read_excel(
  sprintf("%s/data/%s",getwd(),filename), 
  sheet = city, skip = 2
) %>% select(c(-2,-45)) %>%
  filter(!is.na(`Indicator EN`))->DataG#45 is the black border
for(i in 3:43){DataG[is.na(DataG[,i]),i]<-0}#put 'not relevant' if is was left open
names(DataG)[c(1,44:50)]<-c('Indicator','Implementation','SpatialLevel','Frequency','Relevance','Feasability','Clarity','Credibility')
DataG$City<-'Genk'


##############################################################
#------- bind all dataset of each of the case studies--------#
##############################################################
Data<-rbind(DataC, DataG, DataL, DataV)
Data %>% mutate(Implementation = factor(as.factor(Implementation), levels = c('Being measured and used','Being measured but not used','Potential indicator, easily implemented','Potential indicator, difficult to implement','Potential indicator, unsure if implementable')),
               SpatialLevel = factor(as.factor(SpatialLevel),levels = c('Higher','National','Regional','Municipality','Neighbourhood','Project','Household')),
               Frequency = factor(as.factor(Frequency),levels = c('One time','Twice (before realization of the project and afterwards)','Less than yearly','Yearly or more','Monthly or more')),
               Relevance = factor(as.factor(Relevance),levels = c('Very low','Low','Medium','High','Very high')),
               Feasability = factor(as.factor(Feasability),levels = c('Very low','Low','Medium','High','Very high')),
               Clarity = factor(as.factor(Clarity),levels = c('Very low','Low','Medium','High','Very high')),
               Credibility = factor(as.factor(Credibility),levels = c('Very low','Low','Medium','High','Very high')),
               ) -> Data
#Datalong has two new columns: KPI holds the KPI which is linked to a specific indicators. The specificity has the score that was given to the indicator for this particular KPI (1 (weakly linked) or 2 (strongly linked)). We do not keep links between both if they were 0
gather(Data,key = KPI, value = specificity,3:43,factor_key=TRUE)%>%
  filter(specificity==2 | specificity==1) -> Datalong
#-----Aggregate results over all KPIs------#
Agg <- data.frame(KPI = colnames(Data[,3:43]),
                Perfect = sapply(X = 3:43, FUN = function(x){sum(Data[,x]==2)}),
                Somewhat = sapply(X = 3:43, FUN = function(x){sum(Data[,x]==1)}),
                UsedPerfect = sapply(X = 3:43, FUN = function(x){sum(Data[,x]==2 & Data$Implementation== 'Being measured and used')}),
                UsedSomewhat = sapply(X = 3:43, FUN = function(x){sum(Data[,x]==1 & Data$Implementation=='Being measured and used')}),
                level1 = factor(as.factor(c(rep('Nature',5),rep('Quantity and quality of GBI',4),rep('Regulation NCP',10),rep('Material NCP',4),rep('Non-material NCP',2),rep('Cultural values of GBI',3),rep('Health and wellbeing',4),rep('Governance and justice',5),rep('Economic aspects',4))),levels=c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects')),
                level0 = factor(as.factor(c(rep('Nature / (bio)physical environment',9),rep('Contributions',16),rep('People',16))),levels=c('Nature / (bio)physical environment','Contributions','People')))
Datalong$level0 <- sapply(X = Datalong$KPI, FUN=function(x) Agg[Agg$KPI==x,'level0'])
Datalong$level1 <- sapply(X = Datalong$KPI, FUN=function(x) Agg[Agg$KPI==x,'level1'])
```

This report shows the preliminary results from the Indicators-KPI cross-list. 

# Data exploration
Before we try to model anything or delve into deeper analysis of the data, we exlore the raw data.

## Number of indicators
Figure \@ref(fig:IndicatorsByKPI) below shows how many indicators can be somewhat (on the left) or perfectly linked to each of the KPIs of interest. Figure \@ref(fig:IndicatorsByKPI-city) shows the same for each of the cities separately. Note that there is some double counting in these graphs; if a certain indicator can assess more than one KPI, both Energy and Food for example, it is counted twice for the 'Material NCP' KPI.

Some preliminary conclusions:
*Among the indicators that can capture KPIs perfectly, most measure Nature
*Among the indicators that can somewhat capture a KPI, most measure People KPIs


```{r IndicatorsByKPI, fig.height=4, fig.width=10, fig.cap = "Number of indicators for each KPI"}
#p1 <- PieDonut(Datalong, aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
#alternative graph
p2<-ggplot(Datalong[Datalong$specificity==2,])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw()
  
p3<-ggplot(Datalong[Datalong$specificity==1,])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw()
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))
```
```{r IndicatorsByKPI-city, fig.height=15, fig.width=10, fig.cap = "Number of indicators for each KPI"}
#p1 <- PieDonut(Datalong, aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
#alternative graph
p2<-ggplot(Datalong[Datalong$specificity==2,])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
  
p3<-ggplot(Datalong[Datalong$specificity==1,])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))
```

## Indicators that are used in practice

The next two graphs in Figure \@ref(fig:Indicatorsmeasured) show only those indicators that are measured and used in practice. What stands out is that KPIs to assess Nature/(bio) physical environment are much mode prevalent in practice. This indicates that there is a clear bias towards these indicators. People and contribution indicators are of a much smaller interest.


```{r Indicatorsmeasured, fig.height=4, fig.width=10, fig.cap= "Number of indicators that are measured and used."}
measuredused<-(Datalong$Implementation=='Being measured and used')
p2<-ggplot(Datalong[measuredused & Datalong$specificity==2 & !is.na(Datalong$Implementation),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() 
  
p3<-ggplot(Datalong[measuredused & Datalong$specificity==1 & !is.na(Datalong$Implementation),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw()
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))  
#PieDonut(Datalong[measuredused,], aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
```

```{r Indicatorsmeasured2, fig.height=15, fig.width=10, fig.cap= "Number of indicators that are measured and used in each of the cities."}
measuredused<-(Datalong$Implementation=='Being measured and used')
p2<-ggplot(Datalong[measuredused & Datalong$specificity==2 & !is.na(Datalong$Implementation),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
  
p3<-ggplot(Datalong[measuredused & Datalong$specificity==1 & !is.na(Datalong$Implementation),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))  
#PieDonut(Datalong[measuredused,], aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
```

##Low-hanging fruit
Gathering and processing data is often time-consuming so it is perfectly understandable that park management chooses to focus on only a couple of indicators. However, we believe that there are quite some low-hanging fruits to improve the current indicator set. Firstly, some cities indicated that some data is being measured but not used in any way. Secondly, some indicators should be easy to implement, according to our contacts (see figures \@ref(fig:Indicatorslowfruit1) and \@ref(fig:Indicatorslowfruit2)).

```{r Indicatorslowfruit1, fig.height=6, fig.width=10, fig.cap= "Number of indicators that are either measured but not used or can easily be implemented"}
Lowfruit<-(Datalong$Implementation=='Potential indicator, easily implemented')|(Datalong$Implementation=='Being measured but not used')
p2<-ggplot(Datalong[Lowfruit & Datalong$specificity==2 & !is.na(Lowfruit),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() 
  
p3<-ggplot(Datalong[Lowfruit & Datalong$specificity==1 & !is.na(Lowfruit),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw()
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))
  #PieDonut(Datalong[measured,], aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
```
```{r Indicatorslowfruit2, fig.height=6, fig.width=10, fig.cap= "Number of indicators that are either measured but not used or can easily be implemented, by city"}
Lowfruit<-(Datalong$Implementation=='Potential indicator, easily implemented')|(Datalong$Implementation=='Being measured but not used')
p2<-ggplot(Datalong[Lowfruit & Datalong$specificity==2 & !is.na(Lowfruit) & !is.na(Datalong$specificity),])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that perfectly capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
  
p3<-ggplot(Datalong[Lowfruit & Datalong$specificity==1 & !is.na(Lowfruit) & !is.na(Datalong$specificity)])+geom_bar(aes(x=level0,fill=level1))+
  scale_fill_manual(name='', limits = c('Nature', 'Quantity and quality of GBI', 'Regulation NCP', 'Material NCP', 'Non-material NCP', 'Cultural values of GBI', 'Health and wellbeing', 'Governance and justice', 'Economic aspects'), values = c('Nature'= 'springgreen4', 'Quantity and quality of GBI'= 'lightgreen', 'Regulation NCP' = 'cyan4', 'Material NCP' = 'cyan3', 'Non-material NCP' = 'cyan2', 'Cultural values of GBI' = 'darkred', 'Health and wellbeing'='firebrick3', 'Governance and justice'='lightcoral', 'Economic aspects'='pink')) + ylab ('Number of indicators that somewhat capture the KPI') + xlab('Type of KPI')+ scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 
  theme_bw() + facet_grid(rows=vars(City),switch='y')
MAX<-max(layer_scales(p3)$y$range$range[2],layer_scales(p2)$y$range$range[2])
  plot_grid(p2+ theme(legend.position="none") + ylim(0,MAX),
            p3+ theme(legend.position="none") + ylim(0,MAX),
            get_legend(p2),
            nrow=1,
            rel_widths=c(1.5,1.5,0.7))
  #PieDonut(Datalong[measured,], aes(donuts = level1, pies = level0), showPieName=FALSE,labelposition=0)#+facet_wrap(Datalong$City)
```

# Data analysis
## A regression model
```{r regressionmodel, include=FALSE}
Data$measured <- 1*(str_detect(Data$Implementation, regex("measured", ignore_case = TRUE)))
Data$used <- 1*(Data$Implementation=='Being measured and used')
Data$PerfectNB <- rowSums(Data[,3:40]==2)
Data$TotalNB <- rowSums(Data[,3:40]>0)
library(INLA)
m <- inla(
    measured ~ 0 + as.numeric(Credibility) + as.numeric(Clarity) + as.numeric(Feasability) + as.numeric(Relevance) + 
      f(
        SpatialLevel, 
        model = "iid",
        hyper = list(theta = list(prior = "pc.prec", param = c(0.5, 0.05)))
      ) ,
      # + f(
      #  City, 
      #  model = "iid",
      #  hyper = list(theta = list(prior = "pc.prec", param = c(0.5, 0.05)))
      #) +
    family = "binomial",
    data = Data,
    control.compute = list(config = TRUE)
  )
  
```
A regression model to explain which type of indicators are usually chosen. The hypothesis is that people will choose indicators that have high feasibility / are easy to implement. 

## Principal component analysis (PCA)
```{r tests with PCA - ade4 package, silent=TRUE}
library(ade4)
library(factoextra)#to visualize the pca results
a<-(!is.na(Data$Credibility)&!is.na(Data$Relevance)&!is.na(Data$Feasability)&!is.na(Data$Clarity))
Dpca<-cbind(as.numeric(unlist(Data$Relevance)),
            as.numeric(unlist(Data$Feasability)),
            as.numeric(unlist(Data$Clarity)),
            as.numeric(unlist(Data$Credibility)))
Dpca<-Dpca[a,]
pca1<-dudi.pca(Dpca,scannf=FALSE,nf=3)
```

Here, we visualize the eigenvalues in a scree plot in Figure\@ref(fig:ScreePlot) and map the indicators in Figure \@ref(fig:PCAresults)
```{r ScreePlot, fig.cap = "Eigenvalues in a scree plot"}
fviz_eig(pca1)
```

```{r PCAresults, fig.cap="Mapping of the indicators. Indicators with a similar profile are grouped together. The color represents the quality of representation."}
fviz_pca_ind(pca1,
             col.ind = "cos2", # cos2 = the quality of the individuals on the factor map
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),repel = TRUE, geom="point")     # Avoid text overlapping
```

```{r PCAresults2, fig.cap="Mapping of the indicators. Indicators with a similar profile are grouped together. The color represents the group of indicators."}
fviz_pca_ind(pca1,
            habillage = as.factor(unlist(Data[a, 'City'])),repel = TRUE, geom="point", addEllipses=TRUE, ellipse.level=0.95)     # Avoid text overlapping
```

```{r PCAresults3, fig.cap="Mapping of the indicators. Indicators with a similar profile are grouped together. The color represents the group of indicators."}
fviz_pca_var(pca1,
            habillage = as.factor(unlist(Data[a, 'City'])),repel = TRUE, geom="point", addEllipses=TRUE, ellipse.level=0.90)     # Avoid text overlapping
```

###PCA with IVs
IVs would be feasibility, credibility, clarity ans relevance

PCA would be the KPIs that are measured.
```{r testPCA-ade4Package, silent=TRUE}
library(ade4)
library(factoextra)#to visualize the pca results
a<-complete.cases(Data[,c(3:45, 47:51)])
IVpca<-cbind(as.numeric(unlist(Data$Relevance)),
            as.numeric(unlist(Data$Feasability)),
            as.numeric(unlist(Data$Clarity)),
            as.numeric(unlist(Data$Credibility)))
Dpca<-Data[a,3:43]
IVpca<-IVpca[a,]
pca1<-dudi.pca(Dpca,scannf=FALSE,nf=3)
wit1<-
```


## Clustering of the indicators
Here, we cluster the indicators according to the type of KPIs that they are capable of measuring. It make sense to choose indicators that can cover different KPIs... If indicators measure the same KPIs, it might be better to choose only one of them.

```{r Clustering, silent=TRUE, include=FALSE}
library(cluster)
library(vegan)
library(aplpack)
library(fpc)
cstats.table <- function(dist, tree, k) {
clust.assess <- c("cluster.number","n","within.cluster.ss","average.within","average.between",
                  "wb.ratio","dunn2","avg.silwidth")
clust.size <- c("cluster.size")
stats.names <- c()
row.clust <- c()
output.stats <- matrix(ncol = k, nrow = length(clust.assess))
cluster.sizes <- matrix(ncol = k, nrow = k)
for(i in c(1:k)){
  row.clust[i] <- paste("Cluster-", i, " size")
}
for(i in c(2:k)){
  stats.names[i] <- paste("Test", i-1)
  
  for(j in seq_along(clust.assess)){
    output.stats[j, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.assess])[j]
    
  }
  
  for(d in 1:k) {
    cluster.sizes[d, i] <- unlist(cluster.stats(d = dist, clustering = cutree(tree, k = i))[clust.size])[d]
    dim(cluster.sizes[d, i]) <- c(length(cluster.sizes[i]), 1)
    cluster.sizes[d, i]
    
  }
}
output.stats.df <- data.frame(output.stats)
cluster.sizes <- data.frame(cluster.sizes)
cluster.sizes[is.na(cluster.sizes)] <- 0
rows.all <- c(clust.assess, row.clust)
# rownames(output.stats.df) <- clust.assess
output <- rbind(output.stats.df, cluster.sizes)[ ,-1]
colnames(output) <- stats.names[2:k]
rownames(output) <- rows.all
is.num <- sapply(output, is.numeric)
output[is.num] <- lapply(output[is.num], round, 2)
output
}

library(cluster) 
for(i in 3:43){
  Data[!(Data[,i]==1 | Data[,i]==2),i]<-0
  Data[,i]<-factor(as.factor(unlist(Data[,i])),levels=c('0','1','2'))
}
gower.dist <- daisy(Data[ ,3:43], metric = c("gower"))

```
## cost-value
value= how many indicators are measured
Cost= feasibility

## Spider graphs
How dp feasibility/clarity/


##DEA?