One liner wanted: Create data frame and give colnames: R data.frame(..., colnames = c(a, b, c))
Use setNames()
around a data.frame
setNames(data.frame(matrix(c(1,2,3,4),nrow=2,ncol=2)), c("a","b"))
# a b
#1 1 3
#2 2 4
?setNames
:
a convenience function that sets the names on an object and returns the object
> setNames
function (object = nm, nm)
{
names(object) <- nm
object
}
Create empty dataframes in loop of same size in r and give them colnames of another dataframe?
This base R solution uses a lapply
loop to create a list of data.frames, then assigns the list members' names from the names vector and, finally, puts the df's in the .GlobalEnv
.
df_list <- lapply(stored_df_name, function(x){
setNames(as.data.frame(matrix(nrow=3, ncol=3)), names(df1))
})
names(df_list) <- stored_df_name
list2env(df_list, envir = .GlobalEnv)
rm(df_list)
aa
# name age achivements
#1 NA NA NA
#2 NA NA NA
#3 NA NA NA
If the data.frames need to empty, run the code above with nrow=0
in the matrix(.)
instruction.
How to define the col names BEFORE making an empty Data Frame?
This is just a workaround, but should work for you:
colnames <- c("A","B")
l <- list()
l[[ colnames[1] ]] <- NA
l[[ colnames[2] ]] <- NA
df <- as.data.frame(l)
df <- df[ -1, ]
> str(df)
[1] A B
<0 rows> (or row.names with length 0)
A shorter version is the following:
colnames <- c("A","B")
d <- c(NA,NA)
names(d) <- colnames
df <- data.frame(rbind(d))[-1,]
Have fun!
Use assign() function to assign column names into an empty data frame in R
The difficulty you're having (how do I refer in later code to an object created with a dynamically generated name) is precisely why assign
is a bad idea. Your problem is actually caused by your desire to use assign
.
Put the data frame in a list with a non-dynamic variable name, and set the name of that element of the list using the "dynamic" elements:
a <- list(setNames(data.frame(matrix(nrow = 0, ncol = length(col_names))),col_names))
names(a) <- paste0(x,y)
> a
$Sale2015
[1] Q1 Q2 Q3
<0 rows> (or 0-length row.names)
Rename one named column in R
As of October 2014 this can now be done easily in the dplyr package:
rename(data, d = b)
How to put numeric vector into function instead of character in R?
Here is an alternative how you could achieve your task:
library(dplyr)
library(broom)
data %>%
mutate(gr = as.character(gr)) %>%
select_if(is.numeric) %>%
map_df(~ tidy(t.test(. ~ gr)), .id = 'var') %>%
select(var, statistic, p.value) %>%
mutate(decision = ifelse(p.value < 0.05, "good", "bad"))
var statistic p.value decision
<chr> <dbl> <dbl> <chr>
1 x -1.87 0.135 bad
2 y -1.87 0.135 bad
3 z -1.87 0.135 bad
Convert a Pandas DataFrame to a dictionary
The to_dict()
method sets the column names as dictionary keys so you'll need to reshape your DataFrame slightly. Setting the 'ID' column as the index and then transposing the DataFrame is one way to achieve this.
to_dict()
also accepts an 'orient' argument which you'll need in order to output a list of values for each column. Otherwise, a dictionary of the form {index: value}
will be returned for each column.
These steps can be done with the following line:
>>> df.set_index('ID').T.to_dict('list')
{'p': [1, 3, 2], 'q': [4, 3, 2], 'r': [4, 0, 9]}
In case a different dictionary format is needed, here are examples of the possible orient arguments. Consider the following simple DataFrame:
>>> df = pd.DataFrame({'a': ['red', 'yellow', 'blue'], 'b': [0.5, 0.25, 0.125]})
>>> df
a b
0 red 0.500
1 yellow 0.250
2 blue 0.125
Then the options are as follows.
dict - the default: column names are keys, values are dictionaries of index:data pairs
>>> df.to_dict('dict')
{'a': {0: 'red', 1: 'yellow', 2: 'blue'},
'b': {0: 0.5, 1: 0.25, 2: 0.125}}
list - keys are column names, values are lists of column data
>>> df.to_dict('list')
{'a': ['red', 'yellow', 'blue'],
'b': [0.5, 0.25, 0.125]}
series - like 'list', but values are Series
>>> df.to_dict('series')
{'a': 0 red
1 yellow
2 blue
Name: a, dtype: object,
'b': 0 0.500
1 0.250
2 0.125
Name: b, dtype: float64}
split - splits columns/data/index as keys with values being column names, data values by row and index labels respectively
>>> df.to_dict('split')
{'columns': ['a', 'b'],
'data': [['red', 0.5], ['yellow', 0.25], ['blue', 0.125]],
'index': [0, 1, 2]}
records - each row becomes a dictionary where key is column name and value is the data in the cell
>>> df.to_dict('records')
[{'a': 'red', 'b': 0.5},
{'a': 'yellow', 'b': 0.25},
{'a': 'blue', 'b': 0.125}]
index - like 'records', but a dictionary of dictionaries with keys as index labels (rather than a list)
>>> df.to_dict('index')
{0: {'a': 'red', 'b': 0.5},
1: {'a': 'yellow', 'b': 0.25},
2: {'a': 'blue', 'b': 0.125}}
How to change dataframe column names in PySpark?
There are many ways to do that:
Option 1. Using selectExpr.
data = sqlContext.createDataFrame([("Alberto", 2), ("Dakota", 2)],
["Name", "askdaosdka"])
data.show()
data.printSchema()
# Output
#+-------+----------+
#| Name|askdaosdka|
#+-------+----------+
#|Alberto| 2|
#| Dakota| 2|
#+-------+----------+
#root
# |-- Name: string (nullable = true)
# |-- askdaosdka: long (nullable = true)
df = data.selectExpr("Name as name", "askdaosdka as age")
df.show()
df.printSchema()
# Output
#+-------+---+
#| name|age|
#+-------+---+
#|Alberto| 2|
#| Dakota| 2|
#+-------+---+
#root
# |-- name: string (nullable = true)
# |-- age: long (nullable = true)Option 2. Using withColumnRenamed, notice that this method allows you to "overwrite" the same column. For Python3, replace
xrange
withrange
.from functools import reduce
oldColumns = data.schema.names
newColumns = ["name", "age"]
df = reduce(lambda data, idx: data.withColumnRenamed(oldColumns[idx], newColumns[idx]), xrange(len(oldColumns)), data)
df.printSchema()
df.show()Option 3. using
alias, in Scala you can also use as.from pyspark.sql.functions import col
data = data.select(col("Name").alias("name"), col("askdaosdka").alias("age"))
data.show()
# Output
#+-------+---+
#| name|age|
#+-------+---+
#|Alberto| 2|
#| Dakota| 2|
#+-------+---+Option 4. Using sqlContext.sql, which lets you use SQL queries on
DataFrames
registered as tables.sqlContext.registerDataFrameAsTable(data, "myTable")
df2 = sqlContext.sql("SELECT Name AS name, askdaosdka as age from myTable")
df2.show()
# Output
#+-------+---+
#| name|age|
#+-------+---+
#|Alberto| 2|
#| Dakota| 2|
#+-------+---+
Related Topics
How to Annotate Ggplot2 Qplot Outside of Legend and Plotarea? (Similar to Mtext())
R Shiny: Plot with Dynamical Size
Ggplot2: How to Rotate a Graph in a Specific Angle
Dist Function with Large Number of Points
How to Force the X-Axis Tick Marks to Appear at the End of Bar in Heatmap Graph
R Function That Uses Its Output as Its Own Input Repeatedly
Combining Rows Based on a Column
Multiplying Combinations of a List of Lists in R
Wordcloud Package: Get "Error in Strwidth(…):Invalid 'Cex' Value"
Removing Unicode Symbols from Column Names
Changing the Order of Dodged Bars in Ggplot2 Barplot
Mass Variable Declaration and Assignment in R
How to Custom or Display Modebar in Plotly
Shiny - Custom Warning/Error Messages
How to Add Rows with 0 Counts to Summarised Output
Importing Multiple .CSV Files with Variable Column Types into R
How to Extend the 'Summary' Function to Include Sd, Kurtosis and Skew