Convert pandas dataframe into a matrix of 1's and 0's for presence/absence
You might want to use pivot_table:
df['COUNT'] = 1
print(df.pivot_table('COUNT', index='new_id', columns="code").fillna(0))
(though if you want to stick to integers you will have to append '.astype(int)' to the last line : before filling datas with 0, you will get "1" and "NaN" values which are translated to float64 because of the "NaN")
Creating a 'presence-absence' matrix from a pandas dataframe
>>> pd.crosstab(df['Site'], df['Species'])
Species Neofelis Panthera
Site
A 0 1
B 1 1
C 0 1
D 1 0
How to transform a dataset into a presence/absence matrix?
Here's a tidy solution:
library(stringr)
library(dplyr)
library(tidyr)
dat <- data.frame(
species = c("species_1", "species_1, species_2", "species_2, species_3"),
year = c(2000, 2003, 2005)
)
library(stringr)
dat %>%
rowwise() %>%
mutate(species = list(str_split(species, ",")[[1]])) %>%
unnest(species) %>%
mutate(species = trimws(species),
value=1) %>%
pivot_wider(names_from="species", values_fill = 0)
#> # A tibble: 3 × 4
#> year species_1 species_2 species_3
#> <dbl> <dbl> <dbl> <dbl>
#> 1 2000 1 0 0
#> 2 2003 1 1 0
#> 3 2005 0 1 1
Created on 2022-06-30 by the reprex package (v2.0.1)
Convert a dataframe to presence absence matrix
One possibility:
library(reshape2)
df2 <- melt(df, id.var = "V1")
with(df2, table(V1, value))
# value
# V1 A B C D E F
# File1 1 1 1 0 0 0
# File2 1 1 0 1 0 0
# File3 0 0 0 0 1 1
Create a presence/absence matrix from two variables of a dataframe but adding the information of one third variable from the df instead of value 1
Try dcast
from reshape2
library(reshape2)
dcast(mydf, day~paste0('ind_', individual),
value.var='weight', sum, fill=NA_real_)
# day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1 1 20 18 36 36 41 NA NA
#2 2 25 NA 40 NA 46 30 12
and for 'length'
dcast(mydf, day~paste0('ind_', individual),
value.var='length', sum, fill=NA_integer_)
# day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1 1 12 23 26 15 56 NA NA
#2 2 16 NA 30 NA 60 30 35
Or using base R
xtabs(weight~day+individual, mydf)
how to create presence/absence data frame with data of columns?
You can extract the unique levels from the data and use sapply
with table
.
lvls <- sort(unique(unlist(df)))
sapply(df, function(x) table(factor(x, lvls)))
# sp1 sp2 sp3
#e1 1 1 0
#e2 1 0 1
#e3 0 1 1
#e4 1 1 0
#e5 0 0 1
Transforming matrix of presence/absence to Data.frame of vertice connection. (Removing duplicated rows with eeuqal unordered values)
The problem can be solved using the purrr package.
# reproduce input
mat <- matrix(
data = c(1,0,0,0,0,0,0,
0,1,1,0,1,0,0,
0,0,1,0,1,0,0,
1,1,0,0,0,1,0,
0,0,0,0,0,1,0,
0,1,0,0,1,0,0,
1,0,1,1,1,0,0), nrow = 7, ncol = 7)
colnames(mat) <- LETTERS[1:7]
rownames(mat) <- c("Aiz", "Aren", "Atx", "Berr", "Bra", "Bur", "Cab")
# convert to dataframe
df <- mat %>%
dplyr::as_tibble() %>%
dplyr::bind_cols(
tibble::tibble(Names = rownames(mat)))
# calculate the connections
purrr::map_df(df$Names, function(x){
output <-purrr::map_df(df$Names, function(y){
if(x >= y) return(tibble::tibble()) # avoid double counting
tibble::tibble(
siteA = x,
siteB = y,
weight = sum(as.integer(df[df$Names==x,1:7]) & as.integer(df[df$Names==y,1:7])))
})
})
good luck
Create a presence-absence matrix with presence on specific dates
We can try the code below
library(data.table)
setDT(df1)
setDT(df2)
na.omit(
dcast(
df1[df2, .(Date, ID), on = .(Start < Date, End > Date)][df1, on = .(ID)],
Date ~ ID,
fun.aggregate = length
)
)
which gives
Date Afr Ahe Art
1: 2015-07-01 1 0 0
2: 2015-07-02 1 0 1
3: 2015-07-03 1 0 1
Data
> dput(df1)
structure(list(ID = c("Afr", "Ahe", "Art"), Start = structure(c(16615,
17153, 16617), class = "Date"), End = structure(c(16847, 17586,
18382), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))
> dput(df2)
structure(list(Date = structure(c(16617, 16618, 16619), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))
Related Topics
Calculate Cumsum() While Ignoring Na Values
Pass Function Arguments to Both Dplyr and Ggplot
Why Is Using '<<-' Frowned Upon and How to Avoid It
Embedded Nul in String' Error When Importing CSV with Fread
Plot One Numeric Variable Against N Numeric Variables in N Plots
Filling Area Under Curve Based on Value
Display Weighted Mean by Group in the Data.Frame
R - How to Get Row & Column Subscripts of Matched Elements from a Distance Matrix
Finding Out Which Functions Are Called Within a Given Function
R Function with No Return Value
Sorting Each Row of a Data Frame
How to Create Two Independent Drill Down Plot Using Highcharter
What Leads the First Element of a Printed List to Be Enclosed with Backticks in R V3.5.1
What Is the Most Useful R Trick
Create a Matrix of Scatterplots (Pairs() Equivalent) in Ggplot2
Understanding Dates and Plotting a Histogram with Ggplot2 in R