1

I was able to change the encoding of a dataframe using the code provided in R- Changing encoding of column in dataframe?, but I would like to use map instead of a for loop, but I wasn't able to do so and can't figure out why. I intend to use it a piped workflow which using for loops will be very cumbersome.

# Example of the data:
dat <- structure(list(CNES = c("0137162", "0137170", "0137189", "0137197", 
                               "0137200", "0137219"), CPF_CNPJ = c("87768735000148", "03005201000170", 
                                                                   "00000000000000", "00000000000000", "00000000000000", "87775334000115"
                               ), FANTASIA = c("HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA", "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA", 
                                               "UNIDADE SANIT\xc1RIA 03 - PR\xc9DIO HOSPITAL DA LIGA", "UNIDADE SANIT\xc1RIA 11 - BOSQUE", 
                                               "PROGRAMA DE SA\xdaDE DA FAM\xcdLIA 01 - BAIRRO PROMORAR", "SIND TRAB IND ALIMENTA\xc7\xc3O"
                               ), RAZ_SOCI = c("CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA", 
                                               "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 87.775.334/0001-15-SINDICATO DOS TRABALHADORES NA INDUSTRIA DA ALIMENTA\xc7\xc3O"
                               )), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")

# Using map
dat %>% 
  map(~ Encoding(.x) <-  "latin1")

0

2 Answers 2

2

We need to return the data as well

library(dplyr)
library(purrr)
dat1 <- dat %>% 
   map_dfc(~ {
              Encoding(.x) <-  "latin1"
       .x})

This can be also done with a single line by using the assignment function Encoding<-

dat1 <- dat %>%
          map_dfc(~ `Encoding<-`(.x, "latin1"))

Or without a lambda call

dat %>%
      map_dfc(`Encoding<-`, "latin1")

-checking the structure of the original data and the updated

str(dat)
#'data.frame':  6 obs. of  4 variables:
# $ CNES    : chr  "0137162" "0137170" "0137189" "0137197" ...
# $ CPF_CNPJ: chr  "87768735000148" "03005201000170" "00000000000000" "00000000000000" ...
# $ FANTASIA: chr  "HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA" "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "UNIDADE SANIT\xc1RIA 03 - PR\xc9DIO HOSPITAL DA LIGA" "UNIDADE SANIT\xc1RIA 11 - BOSQUE" ...
# $ RAZ_SOCI: chr  "CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA" "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" ...


str(dat1)
#tibble [6 × 4] (S3: tbl_df/tbl/data.frame)
# $ CNES    : chr [1:6] "0137162" "0137170" "0137189" "0137197" ...
# $ CPF_CNPJ: chr [1:6] "87768735000148" "03005201000170" "00000000000000" "00000000000000" ...
# $ FANTASIA: chr [1:6] "HOSPITAL DE CARIDADE E BENEFICÊNCIA" "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "UNIDADE SANITÁRIA 03 - PRÉDIO HOSPITAL DA LIGA" "UNIDADE SANITÁRIA 11 - BOSQUE" ...
# $ RAZ_SOCI: chr [1:6] "CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFICÊNCIA" "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" ...

Or check by column

sapply(dat, Encoding)
#     CNES      CPF_CNPJ  FANTASIA  RAZ_SOCI 
#[1,] "unknown" "unknown" "unknown" "unknown"
#[2,] "unknown" "unknown" "unknown" "unknown"
#[3,] "unknown" "unknown" "unknown" "unknown"
#[4,] "unknown" "unknown" "unknown" "unknown"
#[5,] "unknown" "unknown" "unknown" "unknown"
#[6,] "unknown" "unknown" "unknown" "unknown"


sapply(dat1, Encoding)
#     CNES      CPF_CNPJ  FANTASIA  RAZ_SOCI 
#[1,] "unknown" "unknown" "latin1"  "latin1" 
#[2,] "unknown" "unknown" "unknown" "unknown"
#[3,] "unknown" "unknown" "latin1"  "unknown"
#[4,] "unknown" "unknown" "latin1"  "unknown"
#[5,] "unknown" "unknown" "latin1"  "unknown"
#[6,] "unknown" "unknown" "latin1"  "latin1" 

Or we can use across as well

dat1 <- dat %>%
       mutate(across(everything(), ~ `Encoding<-`(.x, "latin1"))) 
Sign up to request clarification or add additional context in comments.

1 Comment

My difficulty was related to the assign operation. Very helpful
2

Here is a data.table option which might work and help

setDT(dat)[, lapply(.SD, `Encoding<-`, "latin1")]

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.