Goal: Compare two styles of functional programming on parsing string.
Input: The cabin column in Titanic data (Kaggle)
Output: the number extracted from the first components of the space delimited string
Code:
#################################################
library(microbenchmark)
library(tidyverse)
library(stringr)
pc <- microbenchmark(
purrrV = {
trainDf0 <- trainDfRaw
trainDf0$Cabin %>%
as.character() %>%
map(str_split, " ") %>%
flatten() %>%
map_chr(1) %>%
map_chr(str_replace_all, '[[:alpha:]]','') %>%
map(as.integer) %>%
map(coalesce, as.integer(0)) -> trainDf0$Cabin_Number
},
dplyV = {
trainDf1 <- trainDfRaw
trainDf1 %>% mutate(Cabin_Number = as.character(Cabin)) %>%
separate(Cabin_Number, into = c("Cabin_Number"), sep = " ", extra = "drop", remove = TRUE) %>%
mutate(Cabin_Number = coalesce(as.integer(str_replace(Cabin_Number, "[[:alpha:]]", "")), as.integer(0))) ->
trainDf1
}
)
Result
Unit: milliseconds
expr min lq mean median uq max neval
purrrV 1363.32589 1405.10168 1607.33881 1429.10920 1561.6362 3453.50236 100
dplyV 15.69765 16.69651 19.83311 17.46755 19.8518 79.28219 100
dply is much faster than purrr
No comments:
Post a Comment