Convert a Dataframe to Presence Absence Matrix

Convert pandas dataframe into a matrix of 1's and 0's for presence/absence

You might want to use pivot_table:

df['COUNT'] = 1
print(df.pivot_table('COUNT', index='new_id', columns="code").fillna(0))

(though if you want to stick to integers you will have to append '.astype(int)' to the last line : before filling datas with 0, you will get "1" and "NaN" values which are translated to float64 because of the "NaN")

Creating a 'presence-absence' matrix from a pandas dataframe


>>> pd.crosstab(df['Site'], df['Species'])
Species Neofelis Panthera
Site
A 0 1
B 1 1
C 0 1
D 1 0

How to transform a dataset into a presence/absence matrix?

Here's a tidy solution:

library(stringr)
library(dplyr)
library(tidyr)
dat <- data.frame(
species = c("species_1", "species_1, species_2", "species_2, species_3"),
year = c(2000, 2003, 2005)
)
library(stringr)
dat %>%
rowwise() %>%
mutate(species = list(str_split(species, ",")[[1]])) %>%
unnest(species) %>%
mutate(species = trimws(species),
value=1) %>%
pivot_wider(names_from="species", values_fill = 0)
#> # A tibble: 3 × 4
#> year species_1 species_2 species_3
#> <dbl> <dbl> <dbl> <dbl>
#> 1 2000 1 0 0
#> 2 2003 1 1 0
#> 3 2005 0 1 1

Created on 2022-06-30 by the reprex package (v2.0.1)

Convert a dataframe to presence absence matrix

One possibility:

library(reshape2)
df2 <- melt(df, id.var = "V1")
with(df2, table(V1, value))

# value
# V1 A B C D E F
# File1 1 1 1 0 0 0
# File2 1 1 0 1 0 0
# File3 0 0 0 0 1 1

Create a presence/absence matrix from two variables of a dataframe but adding the information of one third variable from the df instead of value 1

Try dcast from reshape2

library(reshape2)
dcast(mydf, day~paste0('ind_', individual),
value.var='weight', sum, fill=NA_real_)
# day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1 1 20 18 36 36 41 NA NA
#2 2 25 NA 40 NA 46 30 12

and for 'length'

dcast(mydf, day~paste0('ind_', individual),
value.var='length', sum, fill=NA_integer_)
# day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1 1 12 23 26 15 56 NA NA
#2 2 16 NA 30 NA 60 30 35

Or using base R

xtabs(weight~day+individual, mydf)

how to create presence/absence data frame with data of columns?

You can extract the unique levels from the data and use sapply with table.

lvls <- sort(unique(unlist(df)))
sapply(df, function(x) table(factor(x, lvls)))

# sp1 sp2 sp3
#e1 1 1 0
#e2 1 0 1
#e3 0 1 1
#e4 1 1 0
#e5 0 0 1

Transforming matrix of presence/absence to Data.frame of vertice connection. (Removing duplicated rows with eeuqal unordered values)

The problem can be solved using the purrr package.

# reproduce input
mat <- matrix(
data = c(1,0,0,0,0,0,0,
0,1,1,0,1,0,0,
0,0,1,0,1,0,0,
1,1,0,0,0,1,0,
0,0,0,0,0,1,0,
0,1,0,0,1,0,0,
1,0,1,1,1,0,0), nrow = 7, ncol = 7)
colnames(mat) <- LETTERS[1:7]
rownames(mat) <- c("Aiz", "Aren", "Atx", "Berr", "Bra", "Bur", "Cab")

# convert to dataframe
df <- mat %>%
dplyr::as_tibble() %>%
dplyr::bind_cols(
tibble::tibble(Names = rownames(mat)))

# calculate the connections
purrr::map_df(df$Names, function(x){
output <-purrr::map_df(df$Names, function(y){
if(x >= y) return(tibble::tibble()) # avoid double counting
tibble::tibble(
siteA = x,
siteB = y,
weight = sum(as.integer(df[df$Names==x,1:7]) & as.integer(df[df$Names==y,1:7])))
})
})

good luck

Create a presence-absence matrix with presence on specific dates

We can try the code below

library(data.table)

setDT(df1)
setDT(df2)

na.omit(
dcast(
df1[df2, .(Date, ID), on = .(Start < Date, End > Date)][df1, on = .(ID)],
Date ~ ID,
fun.aggregate = length
)
)

which gives

         Date Afr Ahe Art
1: 2015-07-01 1 0 0
2: 2015-07-02 1 0 1
3: 2015-07-03 1 0 1

Data

> dput(df1)
structure(list(ID = c("Afr", "Ahe", "Art"), Start = structure(c(16615,
17153, 16617), class = "Date"), End = structure(c(16847, 17586,
18382), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))

> dput(df2)
structure(list(Date = structure(c(16617, 16618, 16619), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))


Related Topics



Leave a reply



Submit