0

I have a data frame named toy like so:

    toy<- structure(list(id = 1:10, Name = c("A", "B", "C", "D", "E", "F", 
"G", "H", "A", "A"), Alt = c("X|Y|a", "O|P|dev", "A|W|are", "M|Q|G", 
"H|f|j|i_m|am", "L|E|B|i|j", "x|C|xx|yy", NA, NA, NA), Place = c(1L, 
4L, 8L, 12L, 13L, 8L, 3L, 1L, 1L, 1L)), .Names = c("id", "Name", 
"Alt", "Place"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-10L), spec = structure(list(cols = structure(list(id = structure(list(), class = c("collector_integer", 
"collector")), Name = structure(list(), class = c("collector_character", 
"collector")), Alt = structure(list(), class = c("collector_character", 
"collector")), Place = structure(list(), class = c("collector_integer", 
"collector"))), .Names = c("id", "Name", "Alt", "Place")), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"))

My purpose is to find matching characters in the Name column that is also in the Alt column. I have tried the following using dplyr:

toy_sep<-toy %>% separate(Alt , into=LETTERS[1:5],sep="\\|",extra="merge",remove=FALSE) %>% gather(Alias_id,Alias,A:E) %>% mutate(Match=match(Alias,Name))

From this the output for whereever there is a match looks like:

matches<-toy_sep[complete.cases(toy_sep),]

It gives close to what I want. However the problem is match returns the first location , while I want all the matches. In the example 1 is returned in the Match column in matches dataframe for A but I want all the ids. A has id's 9 and 10 (from the id column in toy dataframe) as well as 1. Thanks for any help using base/data.table/dplyr

Adding the desired output. Note that the numbers on the top right cell need not be separated by a "|" . :

d_out<-structure(list(id = c(3L, 5L, 6L, 7L, 4L, 6L), Name = c("C", 
"E", "F", "G", "D", "F"), Alt = c("A|W|are", "H|f|j|i_m|am", 
"L|E|B|i|j", "x|C|xx|yy", "M|Q|G", "L|E|B|i|j"), Place = c(8L, 
13L, 8L, 3L, 12L, 8L), Alias_id = c("A", "A", "B", "B", "C", 
"C"), Alias = c("A", "H", "E", "C", "G", "B"), Match = c("1|9|10", 
"8", "5", "3", "7", "2")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L), .Names = c("id", "Name", "Alt", "Place", 
"Alias_id", "Alias", "Match"), spec = structure(list(cols = structure(list(
    id = structure(list(), class = c("collector_integer", "collector"
    )), Name = structure(list(), class = c("collector_character", 
    "collector")), Alt = structure(list(), class = c("collector_character", 
    "collector")), Place = structure(list(), class = c("collector_integer", 
    "collector")), Alias_id = structure(list(), class = c("collector_character", 
    "collector")), Alias = structure(list(), class = c("collector_character", 
    "collector")), Match = structure(list(), class = c("collector_character", 
    "collector"))), .Names = c("id", "Name", "Alt", "Place", 
"Alias_id", "Alias", "Match")), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
1
  • 1
    Could you please include your desired output for this example data? Commented Mar 29, 2017 at 14:44

1 Answer 1

1

Try this.

  toy_sep<-toy %>% 
  separate(Alt , 
           into=LETTERS[1:5],
           sep="\\|",
           extra="merge",
           remove=FALSE) %>% 
  gather(Alias_id,Alias,A:E) %>% 
  mutate(Match=apply(t(Alias),
                     2,
                     FUN = function(x){
                       ind=grep(x,toy$Name)
                       ifelse(!is.na(sum(ind))&length(ind) >= 1 , 
                              paste0(ind,collapse = "|"),
                              NA)
                       }
                     )
         )
  matches<-toy_sep[complete.cases(toy_sep),]
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.