Append Multiple CSV Files into One File Using R

Append multiple csv files into one file using R

Sample CSV Files

Note
CSV files to be merged here have

- equal number of columns

- same column names

- same order of columns

- number of rows can be different

1st csv file abc.csv

A,B,C,D
1,2,3,4
2,3,4,5
3,4,5,6
1,1,1,1
2,2,2,2
44,44,44,44
4,4,4,4
4,4,4,4
33,33,33,33
11,1,11,1

2nd csv file pqr.csv

 A,B,C,D
1,2,3,40
2,3,4,50
3,4,50,60
4,4,4,4
5,5,5,5
6,6,6,6

List FILENAMES of CSV Files

Note
The path below E:/MergeCSV/ has just the files to be merged. No other csv files. So in this path, there are only two csv files, abc.csv and pqr.csv

## List filenames to be merged. 
filenames <- list.files(path="E:/MergeCSV/",pattern="*.csv")

## Print filenames to be merged
print(filenames)
## [1] "abc.csv" "pqr.csv"

FULL PATH to CSV Files

## Full path to csv filenames
fullpath=file.path("E:/MergeCSV",filenames)

## Print Full Path to the files
print(fullpath)
## [1] "E:/MergeCSV/abc.csv" "E:/MergeCSV/pqr.csv"

MERGE CSV Files

## Merge listed files from the path above
dataset <- do.call("rbind",lapply(filenames,FUN=function(files){ read.csv(files)}))

## Print the merged csv dataset, if its large use `head()` function to get glimpse of merged dataset
dataset
# A B C D
# 1 1 2 3 4
# 2 2 3 4 5
# 3 3 4 5 6
# 4 1 1 1 1
# 5 2 2 2 2
# 6 44 44 44 44
# 7 4 4 4 4
# 8 4 4 4 4
# 9 33 33 33 33
# 10 11 1 11 1
# 11 1 2 3 40
# 12 2 3 4 50
# 13 3 4 50 60
# 14 4 4 4 4
# 15 5 5 5 5
# 16 6 6 6 6

head(dataset)
# A B C D
# 1 1 2 3 4
# 2 2 3 4 5
# 3 3 4 5 6
# 4 1 1 1 1
# 5 2 2 2 2
# 6 44 44 44 44

## Print dimension of merged dataset
dim(dataset)
## [1] 16 4

Merge multiple .csv files into one

# Get file list
file_list <- list.files()

# Read all csv files in the folder and create a list of dataframes
ldf <- lapply(file_list , read.csv)

# Combine each dataframe in the list into a single dataframe
df.final <- do.call("rbind", ldf)

merge multiple .csv files - R

You can use :

data_csv <- do.call(rbind, lapply(myfiles, read.csv, sep = ";"))

Or with purrr's map_df

data_csv <- purrr::map_df(myfiles, read.csv, sep = ";"))

If there are lot of files you can use data.table functions.

library(data.table)
data_csv <- rbindlist(lapply(myfiles, fread))

Combining multiple csv files together in an r loop

Here are couple of ways to do count Type column from each file, add a new column with the filename and bind the output together.

Using base R :

files = list.files(pattern = "*.csv", full.names = TRUE)

new_data <- do.call(rbind, lapply(files, function(x) {
mydata <- read.csv(x, skip=1, header = TRUE)
transform(as.data.frame(table(mydata$Type)),
filename = basename(x))
}))

and with tidyverse :

library(dplyr)

new_data <- purrr::map_df(files, function(x) {
mydata <- read.csv(x, skip=1, header = TRUE)
mydata %>%
count(Type) %>%
mutate(filename = basename(x))
})

Combine multiple csv files (similar names) from different folders into one csv and bind them row wise

You can use list.files with pattern and recursive = TRUE.

filenames <- list.files(pattern = 'abc\\.csv', recursive = TRUE)
result <- purrr::map_df(filenames, read.csv, .id = 'id')

result will have all the data combined in 1 file with an additional column id which will differentiate rows from each folder.

Combine and summaries multiple CSV from one folder in R

Assuming your CSVs are in a folder ./data, get a vector of the file names.

library(tidyverse)

filenames <- list.files("./data", "*.csv", full.names = TRUE)

Create a function that reads in the data. Read in the first line to get the name. Then read in the rest of the data, skipping the first two rows.

process_csv <- function(filename) {
name <- read_csv(filename, n_max = 1, col_names = FALSE)[[2]]

number <- read_csv(filename, n_max = 1, col_names = FALSE, skip = 1)[[2]]

read_csv(filename, skip = 2) %>%
mutate(year = lubridate::year(lubridate::dmy_hm(DATE))) %>%
summarise(
count = n(),
start_year = min(year),
end_year = max(year),
year_count = end_year - start_year
) %>%
mutate(
name = name,
number = number,
.before = 1
)
}

Apply the function and save the output.

result <- map_dfr(filenames, process_csv)

write_csv(result, "result.csv")

result
#> # A tibble: 2 x 6
#> name number count start_year end_year year_count
#> <chr> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 NEW_PLACE_1 123 281 1972 2020 48
#> 2 NEW_PLACE_2 155 393 1961 2020 59


Related Topics



Leave a reply



Submit