I am having trouble setting up a nested loop for data cleaning. Each semester (Fall, Spring, Summer) has three data sets that need to be combined over multiple years (2018, 2019, 2020, 2021, 2022). The catch is that I don't need Spring and Summer 2018 because it's part of a different academic year. The data is not available for Summer 2023. I have the two groups of data sets available, Fall 2020, and Spring 2021 [https://drive.google.com/drive/folders/156RqWANAu2wL-Tr-x_wsdZ_qjEgM_5jM?usp=share_link][1]. The goal is to have each semester's data combined and cleaned. I have the two groups of data sets, Fall 2020, and Spring 2021
Semester = c("FA","SP", "SU")
Year = c("18","19","20","21","22")
for(Sem in Semester)
{
for(Yr in Year){
if((Sem== "SU" & Yr=="2022") | (Sem=="SP" & Yr== "2018")| (Sem=="SU" & Yr=="2018"))
next
Prep_[[Sem,Yr]] <- bind_rows(ASPH_Grad_[[Sem, Yr]], ID_Grad_[[Sem, Yr]]) %>%
left_join(., PhGrad_[[Sem, Yr]], by= c("ID"="BannerID")) %>%
distinct(ID, Program, .keep_all = T) %>%
mutate(New_Deg= case_when(is.na(PHGRAD.Degree)== F~ PHGRAD.Degree,
is.na(PHGRAD.Degree)== T~ Degree,
TRUE~ "Error")) %>%
rowwise() %>%
mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>%
ungroup() %>%
mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
`Race- Hispanic`== "Y" ~ "hispanic_latino",
racecount >1 ~ "two_or_more_races",
`Race-Am Ind`== "Y" ~ "american_indian_alaskan_native",
`Race- Asian`== "Y" ~ "asian",
`Race-Afr Amer`== "Y" ~ "black_african_american",
`Race- Hawaiian` == "Y" ~ "native_hawaiian_pacific_islander",
`Race- Caucasian`== "Y" ~ "white",
`Race-Not Rept`== "Y" ~ "race_unknown",
TRUE~ "race_unknown"),
gender_long= case_when(Gender== "F"~ "Female",
Gender== "M"~ "Male",
Gender== "N"~ "Other",
TRUE~ "other"),
DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS",
Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
Program %in% list("3GPH592AMPH", "3GPH592APHD", "3GPH576CGS", "3GPH121CGS", "3GID635CGS")~ "HPEB",
Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA")~ "HSPM",
TRUE~ "Missing"),
degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
`First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive",
TRUE~ "Campus-based"),
FTE_compute= case_when(Level== "GR" & `Course Hours`<9 ~ `Course Hours`/9,
Level== "GR" & `Course Hours`>=9~ 1,
Level== "UG" & `Course Hours`<12~ `Course Hours`/12,
Level== "UG" & `Course Hours`>=12 ~ 1),
Full_Part_Status=case_when((Level== "GR" & `Course Hours` <9)| (Level== "UG" & `Course Hours`<12)~"parttime_status",
(Level=="GR" & `Course Hours`>=9)|(Level== "UG" & `Course Hours`>=12)~"fulltime_status",
TRUE~ "other"),
AcademicYear= paste(Sem,"_",YR),
StudentCount= 1)
Dat_[[Sem, YR]]
}
}