1

I have a data frame named A which are company names and cities, like

A:

name    city 
IBM      NY
AMZ      NY
ALI      SH
TAOBAO   London

Also a data frame B

B:

ID       NAME1    CITY1   NAME2   CITY2
1        LENVO            AMZ
2        APPLE            HUAWEI  
3        AMZ              ALI
4        XIAOMI           VIVO
5        IBM              GOOGLE

In data B, I have two columns of company names, accordingly, two city columns which are blank (there are 10k in my real dataset with 6 name-city pairs) , what I want is that get the city name in dataset B from dataset A based on the name match, which the result C should be as below in this example: C

ID       NAME1    CITY1   NAME2   CITY2
1        LENVO            AMZ     NY
2        APPLE            HUAWEI   
3        AMZ      NY      ALI     SH
4        XIAOMI           TAOBAO  London
5        IBM      NY      GOOGLE

how to make it? Thanks in advance for any suggestion!

3 Answers 3

1

You can use match over 'NAME' columns to get corresponding city names from A dataframe.

city_cols <- grep('CITY', names(B)) 
name_cols <- grep('NAME', names(B)) 

B[cols] <- lapply(B[name_cols], function(x) A$city[match(x, A$name)])
B

#  ID  NAME1 CITY1  NAME2 CITY2
#1  1  LENVO  <NA>    AMZ    NY
#2  2  APPLE  <NA> HUAWEI  <NA>
#3  3    AMZ    NY    ALI    SH
#4  4 XIAOMI  <NA>   VIVO  <NA>
#5  5    IBM    NY GOOGLE  <NA>

data

A <- structure(list(name = c("IBM", "AMZ", "ALI", "TAOBAO"), city = c("NY", 
"NY", "SH", "London")), class = "data.frame", row.names = c(NA, -4L))

B <- structure(list(ID = 1:5, NAME1 = c("LENVO", "APPLE", "AMZ", "XIAOMI", 
"IBM"), CITY1 = c("", "", "", "", ""), NAME2 = c("AMZ", "HUAWEI", 
"ALI", "VIVO", "GOOGLE"), CITY2 = c("", "", "", "", "")), 
row.names = c(NA, -5L), class = "data.frame")
Sign up to request clarification or add additional context in comments.

Comments

1

Base R

B <- merge(B, A, by.x = "NAME1", by.y = "name", all.x = TRUE)
B$CITY1 <- B$city
B$city <- NULL
B <- merge(B, A, by.x = "NAME2", by.y = "name", all.x = TRUE)
B$CITY2 <- B$city
B$city <- NULL
B
#    NAME2  NAME1 ID CITY1 CITY2
# 1    ALI    AMZ  3    NY    SH
# 2    AMZ  LENVO  1  <NA>    NY
# 3 GOOGLE    IBM  5    NY  <NA>
# 4 HUAWEI  APPLE  2  <NA>  <NA>
# 5   VIVO XIAOMI  4  <NA>  <NA>
B <- B[,c("ID", "NAME1", "CITY1", "NAME2", "CITY2")]
B <- B[order(B$ID),]
B
#   ID  NAME1 CITY1  NAME2 CITY2
# 2  1  LENVO  <NA>    AMZ    NY
# 4  2  APPLE  <NA> HUAWEI  <NA>
# 1  3    AMZ    NY    ALI    SH
# 5  4 XIAOMI  <NA>   VIVO  <NA>
# 3  5    IBM    NY GOOGLE  <NA>

dplyr

library(dplyr)
left_join(B, A, by = c("NAME1" = "name")) %>%
  mutate(CITY1 = city) %>%
  select(-city) %>%
  left_join(A, by = c("NAME2" = "name")) %>%
  mutate(CITY2 = city) %>%
  select(-city)
#   ID  NAME1 CITY1  NAME2 CITY2
# 1  1  LENVO  <NA>    AMZ    NY
# 2  2  APPLE  <NA> HUAWEI  <NA>
# 3  3    AMZ    NY    ALI    SH
# 4  4 XIAOMI  <NA>   VIVO  <NA>
# 5  5    IBM    NY GOOGLE  <NA>

Comments

0

Using pivot_longer/pivot_wider

library(tidyverse)

df %>%
 select(-starts_with("CITY")) %>%
 pivot_longer(!ID, names_prefix="NAME") %>%
 left_join(df_cities, by=c("value"="name")) %>%
 pivot_wider(values_from=c("value", "city"))
Joining, by = "name"
# A tibble: 5 x 5
     ID name_1 name_2 city_1 city_2
  <int> <fct>  <fct>  <fct>  <fct> 
1     1 LENVO  AMZ    NA     NY    
2     2 APPLE  HUAWEI NA     NA    
3     3 AMZ    ALI    NY     SH    
4     4 XIAOMI VIVO   NA     NA    
5     5 IBM    GOOGLE NY     NA   

DATA

read.table(text="name city
IBM NY
AMZ NY
ALI SH
TAOBAO London", header=T) -> df_cities
read.table(text="ID NAME1 CITY1 NAME2 CITY2
1 LENVO '' AMZ ''
2 APPLE '' HUAWEI ''
3 AMZ '' ALI ''
4 XIAOMI '' VIVO ''
5 IBM '' GOOGLE ''", header=T) -> df

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.