library(tibble)
library(dplyr)
library(readr)
library(here)
# updating the dataset
processed_data <- tibble(
service_type = c(
"Hygiene","Hygiene","Hygiene","Sanitation","Sanitation","Sanitation",
"Drinking water","Drinking water","Drinking water","Hygiene","Hygiene","Hygiene",
"Sanitation","Sanitation","Sanitation","Drinking water","Drinking water","Drinking water",
"Hygiene","Hygiene","Hygiene","Sanitation","Sanitation","Sanitation",
"Drinking water","Drinking water","Drinking water","Sanitation","Sanitation","Sanitation",
"Drinking water","Drinking water","Drinking water","Sanitation","Sanitation","Sanitation",
"Drinking water","Drinking water","Drinking water"
),
service_type_description = c(
"Hygiene service","Hygiene service","Hygiene service","Sanitation service","Sanitation service","Sanitation service",
"Drinking water service","Drinking water service","Drinking water service","Hygiene service","Hygiene service","Hygiene service",
"Sanitation service","Sanitation service","Sanitation service","Drinking water service","Drinking water service","Drinking water service",
"Hygiene service","Hygiene service","Hygiene service","Sanitation service","Sanitation service","Sanitation service",
"Drinking water service","Drinking water service","Drinking water service","Sanitation service","Sanitation service","Sanitation service",
"Drinking water service","Drinking water service","Drinking water service","Sanitation service","Sanitation service","Sanitation service",
"Drinking water service","Drinking water service","Drinking water service"
),
coverage = c(
45.48,31.31,54.99,12.82,2.99,19.40,46.98,59.11,38.84,44.86,56.98,36.73,
41.55,31.36,48.39,4.48,9.78,0.92,9.66,11.72,8.28,17.79,30.76,9.10,
4.22,10.51,0.00,18.76,20.44,17.64,42.94,17.16,60.23,9.07,14.45,5.47,
1.38,3.44,0.00
),
population = c(
15658992,4327143,11331848,4412014,413732,3998282,16174213,8170136,8004077,
15442597,7874851,7567745,14305984,4334764,9971219,1541579,1351057,190522,
3325825,1619234,1706592,6126340,4250777,1875563,1452437,1452437,NA,6459799,
2825133,3634665,14783610,2372024,12411585,3123277,1996823,1126455,475575,475576,NA
),
residence_type = c(
"total","rural","urban","total","rural","urban",
"total","rural","urban","total","rural","urban",
"total","rural","urban","total","rural","urban",
"total","rural","urban","total","rural","urban",
"total","rural","urban","total","rural","urban",
"total","rural","urban","total","rural","urban",
"total","rural","urban"
),
residence_type_description = c(
rep(NA, 39)
),
service_level = c(
"Basic service","Basic service","Basic service","Basic service","Basic service","Basic service",
"Basic service","Basic service","Basic service","Limited service","Limited service","Limited service",
"Limited service","Limited service","Limited service","Limited service","Limited service","Limited service",
"No handwashing facility","No handwashing facility","No handwashing facility","Open defecation","Open defecation","Open defecation",
"Surface water","Surface water","Surface water","Safely managed service","Safely managed service","Safely managed service",
"Safely managed service","Safely managed service","Safely managed service","Unimproved","Unimproved","Unimproved",
"Unimproved","Unimproved","Unimproved"
),
Region = rep(paste0("Region ", 1:13), each = 3),
Region_description = rep(NA, 39) # Optional: placeholder for region description
)
processed_data <- processed_data %>%
select(service_type, coverage, population, residence_type, service_level, Region)
# viewing the dataset to confirm
print(processed_data)