-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrecomposition_data_2020_2.R
104 lines (85 loc) · 4.11 KB
/
recomposition_data_2020_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(readxl)
library(openxlsx)
library(tidyverse)
data_2020_2 <- read_excel(path = "data/BDD_OS_2020_2_v2.xlsx")
significance_2020_2 <- read_excel(path = "data/Feature_significance_2020_2.xlsx")
condition_assessment_2020_2 <- read_excel(path = "data/2021_08_13_condition_assessment_2020_2_compilation.xlsx")
significance_2020_2 <- significance_2020_2 %>%
select(`Feature ID`, `RCU_Feature Significance`) %>%
mutate(`RCU_Feature Significance` = str_to_title(`RCU_Feature Significance`)) %>%
unique()
condition_assessment_2020_2 <- condition_assessment_2020_2 %>%
select(`Feature ID`, disturbance_causes, disturbance_categories, disturbance_effects) %>%
unique()
condition_assessment_2020_2 %>%
mutate(duplicata = duplicated(x = `Feature ID`)) %>%
filter(duplicata == TRUE)
data_2020_2 %>%
left_join(x = ., y = significance_2020_2, by = c("OS_Number" = "Feature ID")) %>%
left_join(x = ., y = condition_assessment_2020_2, by = c("OS_Number" = "Feature ID")) -> data_2020_2_revu
list_of_dataset <- list("DATA" = data_2020_2_revu)
write.xlsx(list_of_dataset,
file = "data/BDD_OS_2020_2_v3.xlsx",
append = TRUE)
#### des éléments qui doivent être revus ####
data_2020_2 <- read_excel(path = "data/BDD_OS_2020_2_v3.xlsx")
data_2020_2 %>% select(`Main periods`) %>% unique()
data_2020_2 %>%
mutate(`Description date` = as.character(`Description date`)) %>%
mutate(`Site accessibility` = str_to_title(`Site accessibility`)) %>%
mutate(`Main periods` = case_when(
`Main periods` == "Late Ottoman|Modern" ~ "Islamic|Modern",
`Main periods`== "Late Ottoman|Kingdom of Saudi Arabia" ~ "Islamic|Modern",
`Main periods`== "Late Ottoman" ~ "Islamic",
`Main periods`== "unknown" ~ "Unknown",
`Main periods` == "Late Ottoman|Contemporary|Late Ottoman|Modern|WWI|Kingdom of Saudi Arabia|WWII|Modern" ~ "Islamic|Modern",
`Main periods` == "Late Ottoman|Modern|WWI|Kingdom of Saudi Arabia|WWII|Modern" ~ "Islamic|Modern",
`Main periods`== "Contemporary|Late Ottoman|Modern|WWI|Kingdom of Saudi Arabia|WWII|Modern" ~ "Islamic|Modern",
TRUE ~ `Main periods`
)) %>%
mutate(Periods = case_when(
`Main periods` == "Islamic|Modern" ~ "Late Ottoman|Unknown",
`Main periods` == "Islamic" ~ "Late Ottoman",
`Main periods` == "Modern" ~ "Unknown",
`Main periods` == "Unknown" ~ "Unknown",
`Main periods` == "unknown|Dadanite" ~ "Unknown|Dadanite",
`Main periods` == "Nabataean Kingdom" ~ "Nabataean Kingdom",
`Main periods` == "Dadanite" ~ "Dadanite"
)) %>%
mutate(`Main periods` = case_when(
`Main periods` == "unknown|Dadanite" ~ "Unknown|Iron Age/Pre-Classical",
`Main periods` == "Dadanite" ~ "Iron Age/Pre-Classical",
`Main periods` == "Pre-Islamic" ~ "Classical/Pre-Islamic",
`Main periods` == "Nabataean Kingdom" ~ "Classical/Pre-Islamic",
TRUE ~ `Main periods`
)) %>%
mutate(Periods = if_else(is.na(Periods), "Unknown", Periods)) %>%
mutate(`Threat Levels` = str_to_title(`Threat Levels`)) %>%
mutate(Description = case_when(
!is.na(`Description TH`) ~ `Description TH`,
is.na(`Description TH`) & !is.na(`Description Corrigée`) ~ `Description Corrigée`,
TRUE ~ Description
)) %>%
mutate(Description = if_else(is.na(Description), "x", Description)) -> data_2020_2
#### adding real names ####
noms_des_gens <- read_excel(path = "data/liste_noms/liste_noms_abreviations.xlsx")
noms_des_gens %>%
mutate(noms = paste0(Prénom, " ", Nom)) %>%
select(ABREVIATION, noms) -> noms_des_gens
data_2020_2 <- data_2020_2 %>%
separate_rows(`People names`, sep = "\\|") %>%
left_join(x = ., y = noms_des_gens, by = c("People names" = "ABREVIATION")) %>%
mutate(`People names` = noms) %>%
select(-noms) %>%
filter(!is.na(`People names`)) %>%
group_by_at(vars(-`People names`)) %>%
summarise_at("People names", paste, collapse = "|") %>%
ungroup() %>%
relocate(`People names`, .after = surveyGrid) %>%
naniar::replace_with_na(replace = list(
`People names` = c("UCOP Team")
))
list_of_dataset <- list("DATA" = data_2020_2)
write.xlsx(list_of_dataset,
file = "data/BDD_OS_2020_2_v4.xlsx",
append = TRUE)