library(data.table)
replications = 10
filepaths = list.files("data/daily/", pattern = "*.csv", full.names = TRUE)
locations = gsub("data\\/daily\\/|_....-..-..\\.csv", "", filepaths)
dates = gsub("data\\/daily\\/.*_|\\.csv", "", filepaths)
get_dt = function() {
dt = list()
for (i in 1:length(filepaths)) {
dt[[i]] = fread(filepaths[i])
dt[[i]][, city := locations[i]]
dt[[i]][, date := dates[i]]
}
rbindlist(dt)
}
get_df = function() {
df = list()
for (i in 1:length(filepaths)) {
df[[i]] = read.csv(filepaths[i])
df[[i]]$city = locations[i]
df[[i]]$date = dates[i]
}
do.call(rbind, df)
}
Here we use rbenchmark to test above two functions
library(rbenchmark)
within(benchmark(get_dt(), get_df(), replications = replications,
columns=c('test', 'replications', 'elapsed', "relative")),
{ average = elapsed/replications })
## test replications elapsed relative average
## 2 get_df() 10 507.317 2.581 50.7317
## 1 get_dt() 10 196.544 1.000 19.6544
weather = get_dt()
within(benchmark(fwrite(weather, "weather.csv"),
write.csv(weather, "weather.csv"),
replications = replications,
columns=c('test', 'replications', 'elapsed', "relative")),
{ average = elapsed/replications })
## test replications elapsed relative average
## 1 fwrite(weather, "weather.csv") 10 0.737 1.000 0.0737
## 2 write.csv(weather, "weather.csv") 10 8.794 11.932 0.8794
within(benchmark(weather <- fread("weather.csv"),
weather <- read.csv("weather.csv"),
replications = replications,
columns=c('test', 'replications', 'elapsed', "relative")),
{ average = elapsed/replications })
## test replications elapsed relative average
## 1 weather <- fread("weather.csv") 10 2.961 1.000 0.2961
## 2 weather <- read.csv("weather.csv") 10 19.681 6.647 1.9681