Assign Headers Based on Existing Row in Dataframe in R

R data.frame : headers based on existing row containing text and numbers

This happens because, in your initial data frame, V5 is a column of type "int" , not a factor (so you have two different types in your first row)

#> str(df)
#'data.frame':  4 obs. of  9 variables:
# $ V1: Factor w/ 4 levels "254every","TEST",..: 2 3 1 4
# $ V2: Factor w/ 4 levels "187","4","7",..: 4 2 3 1
# $ V3: Factor w/ 4 levels "0","141","6",..: 4 1 3 2
# $ V4: Factor w/ 4 levels "0","140","486text",..: 3 1 4 2
# $ V5: int  725 0 3 129
# $ V6: Factor w/ 4 levels "0","10","130",..: 4 1 2 3
# $ V7: Factor w/ 4 levels "0","157","6",..: 4 1 3 2
# $ V8: Factor w/ 4 levels "0","10","138",..: 4 1 2 3
# $ V9: Factor w/ 4 levels "1","10","168",..: 4 1 2 3

All elements of a vector must be of the same type. When you try to unlist() and store the value in a vector to pass to colnames(), you actually pass a "int" vector (because R coerces the elements to a common type):

#> str(unlist(df[1,]))
# Named int [1:9] 2 4 4 3 725 4 4 4 4
# - attr(*, "names")= chr [1:9] "V1" "V2" "V3" "V4" ...

If you modify the structure of your data frame to specify that column V5 is a factor, your initial method would work:

df[,5] <- as.factor(df[,5])
colnames(df) <- unlist(df[1,])

You would get:

#> df
#      TEST this45  is 486text 725 with ca257 some numbers
#1     TEST this45  is 486text 725 with ca257 some numbers
#2 number45      4   0       0   0    0     0    0       1
#3 254every      7   6       6   3   10     6   10      10
#4    where    187 141     140 129  130   157  138     168

If you don't want to modify your column types, you could apply as.character() to each element of the first row before coercing to a vector and passing to colnames():

colnames(df) <- lapply(df[1,], as.character)

Which results:

#> df
#      TEST this45  is 486text 725 with ca257 some numbers
#1     TEST this45  is 486text 725 with ca257 some numbers
#2 number45      4   0       0   0    0     0    0       1
#3 254every      7   6       6   3   10     6   10      10
#4    where    187 141     140 129  130   157  138     168

Data

structure(list(V1 = structure(c(2L, 3L, 1L, 4L), .Label = c("254every", 
"TEST", "number45", "where"), class = "factor"), V2 = structure(c(4L, 
2L, 3L, 1L), .Label = c("187", "4", "7", "this45"), class = "factor"), 
    V3 = structure(c(4L, 1L, 3L, 2L), .Label = c("0", "141", 
    "6", "is"), class = "factor"), V4 = structure(c(3L, 1L, 4L, 
    2L), .Label = c("0", "140", "486text", "6"), class = "factor"), 
    V5 = c(725L, 0L, 3L, 129L), V6 = structure(c(4L, 1L, 2L, 
    3L), .Label = c("0", "10", "130", "with"), class = "factor"), 
    V7 = structure(c(4L, 1L, 3L, 2L), .Label = c("0", "157", 
    "6", "ca257"), class = "factor"), V8 = structure(c(4L, 1L, 
    2L, 3L), .Label = c("0", "10", "138", "some"), class = "factor"), 
    V9 = structure(c(4L, 1L, 2L, 3L), .Label = c("1", "10", "168", 
    "numbers"), class = "factor")), .Names = c("V1", "V2", "V3", 
"V4", "V5", "V6", "V7", "V8", "V9"), class = "data.frame", row.names = c("1", 
"2", "3", "4"))

How to change the first row to be the header in R?

If you don't want to re-read the data into R (which it seems like you don't from the comments), you can do the following. I had to add some zeros to get your data to read completely, so disregard those.

dat
##       V2        V3        V4        V5           V6  V7           V8    V9    V10
## 17   Zip CuCurrent PaCurrent PoCurrent      Contact Ext          Fax email Status
## 18 74136         0         1         0 918-491-6998   0 918-491-6659     0      1
## 19 30329         1         0         0 404-321-5711   0            0     0      1
## 20 74136         1         0         0 918-523-2516   0 918-523-2522     0      1
## 21 80203         0         1         0 303-864-1919   0            0     0      1
## 22 80120         1         0         0 345-098-8890 456            0     0      1

First take the first row as the column names. Next remove the first row. Finish it off by converting the columns to their appropriate types.

names(dat) <- as.matrix(dat[1, ])
dat <- dat[-1, ]
dat[] <- lapply(dat, function(x) type.convert(as.character(x)))
dat
##     Zip CuCurrent PaCurrent PoCurrent      Contact Ext          Fax email Status
## 1 74136         0         1         0 918-491-6998   0 918-491-6659     0      1
## 2 30329         1         0         0 404-321-5711   0            0     0      1
## 3 74136         1         0         0 918-523-2516   0 918-523-2522     0      1
## 4 80203         0         1         0 303-864-1919   0            0     0      1
## 5 80120         1         0         0 345-098-8890 456            0     0      1

How to convert header into data raw in R?

Assuming you can't import your data properly using that function (and I strongly recommend that you read the documentation for that function throughly, as the argument you're looking for is very likely to exist - it likely just has a different name than in read.table) you can access the "header" using colnames, then just rbind it on top of your data:

df2 <- rbind(colnames(mtcars), mtcars)
head(df2)

                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
1                  mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4           21   6  160 110  3.9  2.62 16.46  0  1    4    4
Mazda RX4 Wag       21   6  160 110  3.9 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85  2.32 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15  3.44 17.02  0  0    3    2

Then you can assign new column names with colnames(df2) <- ...:

# Assign numbers as column names
colnames(df2) <- paste0('V', seq_len(ncol(df2)))
head(df2)

                    V1  V2   V3  V4   V5    V6    V7 V8 V9  V10  V11
1                  mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4           21   6  160 110  3.9  2.62 16.46  0  1    4    4
Mazda RX4 Wag       21   6  160 110  3.9 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85  2.32 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15  3.44 17.02  0  0    3    2

making the first row a header in a dataframe in r

Works for me (with a little trick).

x <- read.table(text = "File Fp1.PD_ShortSOA_FAM Fp1.PD_LongSOA_FAM Fp1.PD_ShortSOA_SEMplus_REAL Fp1.PD_ShortSOA_SEMplus_FICT
    sub0001            0,446222          2,524,804            0,272959                    1,281,349
    sub0002           1,032,688          2,671,048           1,033,278                    1,217,817",
                header = TRUE)

x <- t(x)
colnames(x) <- x[1, ]
x <- x[-1, ]
x

                             sub0001     sub0002    
Fp1.PD_ShortSOA_FAM          "0,446222"  "1,032,688"
Fp1.PD_LongSOA_FAM           "2,524,804" "2,671,048"
Fp1.PD_ShortSOA_SEMplus_REAL "0,272959"  "1,033,278"
Fp1.PD_ShortSOA_SEMplus_FICT "1,281,349" "1,217,817"

Create new data frame with column names as row names, and values from one column as new column names

Example using base t() and tidyverse + magrittr

k <- tibble(Station = 1:4,
       year = c(2016,2016,2016,2017),
       month = c(1,3,7,2),
       rise = c(334,348,242,445),
       set = c(1042,1053,1153,962),
       hrday = c("X1.134","X14.22", "X23.233","X10.753"))

new_df <- k %>% select(-Station,-hrday) %>% t() %>% as_tibble() %>%
  magrittr::set_colnames(c(k$hrday)) %>%
  mutate(var =subset(colnames(k),!colnames(k) %in% c("hrday","Station"))) %>%
  dplyr::select(var, X1.134:X10.753)

new_df

R : assign a row as column names to a dataframe

Did you get this with read.csv or read.table? Either have an option for header = T to read in with the first row as column names if not the below should work.

names(db) <- db[1,]

If you want to delete the first row use:

db <- db[-1,]

Modify a data.frame with sub-headers in R

This can be a way to do it. We use that header_type logic to create a 1 for the first blank line, a 2 for the second blank line, and a 0 elsewhere. The 1 corresponds to a header, and the 2 to a subheader.

From there, we can just fill down into the blank rows.

library(dplyr)
library(tidyr)

data %>% 
  mutate(is_blank = Description == "",
         header_type = (is_blank + lag(is_blank, default = 0)) * is_blank,
         Header = if_else(header_type == 1, Type, NA_character_),
         SubHeader = if_else(header_type == 2, Type, NA_character_)) %>% 
  fill(Header) %>%
  group_by(Header) %>% 
  fill(SubHeader) %>% 
  filter(header_type == 0) %>% 
  select(Type, Header, SubHeader, Description, Gene)

# # Groups:   Header [2]
# Type  Header SubHeader Description Gene 
# <chr> <chr>  <chr>     <chr>       <chr>
#   1 1     A      B         DNA1        rp1  
#   2 2     C      NA        DNA2        rp2  
#   3 3     C      NA        DNA3        rp3  
#   4 4     C      NA        DNA4        rp4

data


Type=c("A", "B", 1, "C", 2, 3, 4)
Description=c("","","DNA1", "", "DNA2", "DNA3", "DNA4")
Gene=c("","","rp1", "", "rp2","rp3", "rp4")

data=data.frame(Type, Description, Gene, stringsAsFactors = FALSE)

R - if row contains x print column header in new column

Try this:

df$C <- apply(df, 1, function(x) paste0(names(df)[as.logical(x)], collapse = ","))

#  A B   C
#1 0 0    
#2 1 1 A,B
#3 1 0   A
#4 0 1   B