Print a Data Frame with Columns Aligned (As Displayed in R)

Print a data frame with columns aligned (as displayed in R)

You could redirect the output of print to file.

max.print <- getOption('max.print')
options(max.print=nrow(dframe) * ncol(dframe))
sink('dframe.txt')
dframe
sink()
options(max.print=max.print)

Print data frame with columns center-aligned

The problem is that in order for this to work as you expect, the "width" argument needs to also be specified.

Here's an example:

test.1 <- data.frame(Variable.1 = as.character(c(1,2,3)), 
                     Variable.2 = as.character(c(5,6,7)))

# Identify the width of the widest column by column name
name.width <- max(sapply(names(test.1), nchar))
format(test.1, width = name.width, justify = "centre")
#   Variable.1 Variable.2
# 1     1          5     
# 2     2          6     
# 3     3          7

But, how does this approach work with columns where the variable names are different lengths? Not so well.

test.2 <- data.frame(A.Really.Long.Variable.Name = as.character(c(1,2,3)), 
                     Short.Name = as.character(c(5,6,7)))

name.width <- max(sapply(names(test.2), nchar))
format(test.2, width = name.width, justify = "centre")
#   A.Really.Long.Variable.Name                  Short.Name
# 1              1                           5             
# 2              2                           6             
# 3              3                           7

There is, of course, a workaround: change the "width" of each variable name to be equal lengths by padding them with spaces (using format())

orig.names <- names(test.2) # in case you want to restore the original names
names(test.2) <- format(names(test.2), width = name.width, justify = "centre")
format(test.2, width = name.width, justify = "centre")
#   A.Really.Long.Variable.Name         Short.Name         
# 1              1                           5             
# 2              2                           6             
# 3              3                           7

Outputting an R dataframe to a .txt file - Align positive and negative values

Maybe try a finer scale treatment of the printing using sprintf and a different format string for positive and negative numbers, e.g.:

> df = data.frame(x=c('PICALM','Luc','SEC22B'),y=c(-2.261085123,-2.235376098,2.227728912))

> sprintf('%15-s%.6f',df$x[1],df$y[1])
[1] "PICALM         -2.261085"

> sprintf('%15-s%.6f',df$x[2],df$y[2])
[1] "Luc            -2.235376"

> sprintf('%15-s%.7f',df$x[3],df$y[3])
[1] "SEC22B         2.2277289"

EDIT:
I don't think that write.table or similar functions accept custom format strings, so one option could be to create a data frame of formatted strings and the use write.table or writeLines to write to a file, e.g.

dfstr = data.frame(x=sprintf('%15-s', df$x),
                   y=sprintf(paste0('%.', 7-1*(df$y<0),'f'), df$y))

(The format string for y here is essentially what I previously proposed.) Next, write dfstr directly:

write.table(x=dfstr,file='filename.txt',
            quote=F,row.names=F,col.names=F)

Does format(..., justify = left) on a dataframe also left-justify the column names?

# print object explicitly
print(x,right=F)

How to print (to paper) a nicely-formatted data frame

Here is a quick and easy possibility using grid.table from the gridExtra package:

library(gridExtra)
pdf("data_output.pdf", height=11, width=8.5)
grid.table(mtcars)
dev.off()

Sample Image

If your data doesn't fit on the page, you can reduce the text size grid.table(mtcars, gp=gpar(fontsize=8)). This may not be very flexible, nor easy to generalize or automate.

Align strings of a dataframe in columns in r

Tested with four examples, but this version was done without regard for the information you added as a workaround in example 4.

The main addition is shuffle logic (which may be quite slow) to compact the resulting dataframe form right to left. It's possible that the assigned_by_suffix and the assigned_by_single_suffix are no longer required, but I have not verified.

Outputs are at the end of the code

# examples
df1 <- read.table(text="
col1         col2        col3 
st1-ab     stb-spst    sta-spst
stc-spst     sta-spst      st4-ab   
stb-spst       st7-ab
st9-ba     stb-spst",header=TRUE,fill=TRUE,stringsAsFactors=FALSE)

df2 <- read.table(text="
col1         col2        col3       col4 
st1-ab       stb-spst    sta-spst   std-spst
stc-spst     sta-spst    st4-ab     st2-ab
stb-spst     st7-ab      sa-ac
st9-ba       stb-spst",header=TRUE,fill=TRUE,stringsAsFactors=FALSE)

df3 <- read.table(text="
col1         col2        col3       col4 
st1-ab       stb-spst    sta-spst   std-spst
stb-spst     sta-ab    
sta-spst     st7-ab      sa-ac
sta-spst     stb-spst",header=TRUE,fill=TRUE,stringsAsFactors=FALSE)

df4 <- read.table(text="
col1         col2        col3       col4 
st1-ab       stb-spst    sta-spst   std-spst
stb-spst     st1-ab    
sta-spst     st7-ab      sa-ac
sta-spst     stb-spst    st7-ab",header=TRUE,fill=TRUE,stringsAsFactors=FALSE)

library(reshape2)
library(tidyr)
library(dplyr)
library(stringr)
library(assertthat)

suffix <- function(s) {str_extract(s, "[^\\-]+$")}

# make a tall dataframe with melt, and get the suffix
dfm <- df4 %>% 
  mutate(row_id = seq_along(col1)) %>%
  melt(id.vars="row_id") %>%
  select(-2) %>%
  filter(value != "") %>%
  mutate(suffix = suffix(value)) %>%
  arrange(value)
assert_that(!any(duplicated(dfm[c("row_id", "value")])))

# initialize 
combined <- data.frame()
remaining <- dfm

# get the groups with more than 1 value
matched_values  <- dfm %>%
  group_by(value, suffix) %>%
  summarize(n=n()) %>%
  filter(n>1) %>%
  rename(group_id = value) %>%
  ungroup()

# .. and assign the group ids that match
assigned_by_value <- remaining %>% 
  inner_join(matched_values %>% select(group_id), by = c("value" = "group_id")) %>%
  mutate(group_id = value) %>%
  select(row_id, value, suffix, group_id)
combined <- combined %>% bind_rows(assigned_by_value)
remaining <- dfm %>% anti_join(combined, by=c("row_id", "value"))
# find the remaining suffixes 
matched_suffixes <- remaining  %>%
  group_by(suffix) %>%
  summarize(n=n()) %>%
  filter(n>1) %>%
  select(-n) %>%
  ungroup()

# ... and assign those that match
assigned_by_suffix <- remaining %>%
  inner_join(matched_suffixes, by="suffix") %>%
  mutate(group_id = suffix)
combined <- bind_rows(combined, assigned_by_suffix)
remaining <- remaining %>% anti_join(combined, by=c("row_id", "value"))

# All that remain are singles assign matches by suffix, choosing the match with fewest
assigned_by_single_suffix <- remaining %>%
  inner_join(matched_values, by = "suffix") %>%
  top_n(1, n) %>%
  head(1) %>%
  select(-n)
combined <- bind_rows(combined, assigned_by_single_suffix)
remaining <- remaining %>% anti_join(combined, by=c("row_id", "value"))

# get the remaining unmatched
unmatched <- remaining%>%
  mutate(group_id = value)
combined <- bind_rows(combined, unmatched)
remaining <- remaining %>% anti_join(combined, by=c("row_id", "value"))
assert_that(nrow(remaining) == 0)

# any overloads (duplicates) need to bump to their own column
dups <- duplicated(combined[,c("row_id", "group_id")])
combined$group_id[dups] <- combined$value[dups]

assert_that(nrow(combined) == nrow(dfm))

# spread the result

result <- spread(combined %>% select(-suffix), group_id, value, fill ="")

# Shuffle any matching suffix from right to left, so l long as there
# is corresponding space an that the whole column can move
# i is source (startign from right) - j is target (starting from right) 
#
drop_cols = c()
suffixes <- suffix(names(result))
for (i in (ncol(result)):3) {
  for(j in (i-1):2) {
    if (suffixes[i] == suffixes[j]) {
      non_empty <- which(result[,i] != "") # list of source to move
      can_fill  <- which(result[,j] == "") # list of targets can be filled
      can_move   <- all(non_empty %in% can_fill) # is to move a subset of can_fill? 

      # if there's space, shuffle the column down
      if (can_move ) {
        # shuffle down
        result[,j] <- if_else(result[,j] != "", result[,j], result[,i]) 
        drop_cols <- c(drop_cols, i)
        result[,i] <- NA
        break
      }
    }                   
  }
}

if (!is.null(drop_cols)) {
  result <- result[,-drop_cols]
}
result

# Example 1
#   row_id     ab st9-ba sta-spst stb-spst
# 1      1 st1-ab        sta-spst stb-spst
# 2      2 st4-ab        sta-spst stc-spst
# 3      3 st7-ab                 stb-spst
# 4      4        st9-ba          stb-spst

# Example 2
#  row_id     ab sa-ac     spst st2-ab st9-ba sta-spst stb-spst
# 1      1 st1-ab       std-spst               sta-spst stb-spst
# 2      2 st4-ab       stc-spst st2-ab        sta-spst         
# 3      3 st7-ab sa-ac                                 stb-spst
# 4      4                              st9-ba          stb-spst 

# Example 3
#   row_id     ab sa-ac sta-spst stb-spst std-spst
# 1      1 st1-ab       sta-spst stb-spst std-spst
# 2      2 sta-ab                stb-spst         
# 3      3 st7-ab sa-ac sta-spst                  
# 4      4              sta-spst stb-spst   

# Example 4
#   row_id sa-ac st1-ab sta-spst stb-spst std-spst
# 1      1       st1-ab sta-spst stb-spst std-spst
# 2      2       st1-ab          stb-spst         
# 3      3 sa-ac st7-ab sta-spst                  
# 4      4       st7-ab sta-spst stb-spst         
>

Print a Data Frame with Columns Aligned (As Displayed in R)