raw_data <- data.frame(
number_of_people = c(5, 25, 50, 50, 17, 17, 80, 80, 6),
type_of_facility = c("Septic tank", "Pit latrines", "unlined_pit_latrines", "unlined_pit_latrines",
"lined_pit_latrines", "lined_pit_latrines", "Septic tank", "Septic tank",
"unlined_pit_latrines"),
size_of_team = c(6, 4, 4, 4, 7, 7, 7, 7, 5),
emptying_method = c("Barrel-based", "Barrel-based", "Barrel-based", "Barrel-based",
"Pump to tank", "Pump to tank", "Barrel-based", "Pump to tank", "Pump to tank"),
pumping_technology = c("Pitvaq", "Manual", "Improved manual", "Improved manual", "Pupu pump", NA,
"Pupu pump", "Pupu pump", "Pupu pump"),
volume_removed = c(5, 720, 1820, 1520, 1600, 1600, 800, 800, NA),
sludge_type = c("Thicker - Like ketchup or yoghurt", "Thicker - Like ketchup or yoghurt",
"Watery - Like water", "Watery - Like water", "Slightly thicker - Like cooking oil",
"Slightly thicker - Like cooking oil", "Slightly thicker - Like cooking oil",
"Slightly thicker - Like cooking oil", NA)
)
processed_data <- raw_data |>
select(
num_people = number_of_people,
facility_type = type_of_facility,
team_size = size_of_team,
method = emptying_method,
tech_used = pumping_technology,
volume = volume_removed,
sludge = sludge_type
) |>
filter(!is.na(num_people))
write_csv(processed_data, here::here("data/processed/FAR_TM_Cleaned_20251201.csv"))
cat("Processed data has been saved to data/processed/FAR_TM_Cleaned_20251201.csv\n")