Convert a Dataframe to Presence Absence Matrix

Convert pandas dataframe into a matrix of 1's and 0's for presence/absence

You might want to use pivot_table:

df['COUNT'] = 1
print(df.pivot_table('COUNT', index='new_id', columns="code").fillna(0))

(though if you want to stick to integers you will have to append '.astype(int)' to the last line : before filling datas with 0, you will get "1" and "NaN" values which are translated to float64 because of the "NaN")

Creating a 'presence-absence' matrix from a pandas dataframe

>>> pd.crosstab(df['Site'], df['Species'])
Species  Neofelis  Panthera
Site
A               0         1
B               1         1
C               0         1
D               1         0

How to transform a dataset into a presence/absence matrix?

Here's a tidy solution:

library(stringr)
library(dplyr)
library(tidyr)
dat <- data.frame(
  species = c("species_1", "species_1, species_2", "species_2, species_3"), 
  year = c(2000, 2003, 2005)
)
library(stringr)
dat %>% 
  rowwise() %>% 
  mutate(species = list(str_split(species, ",")[[1]])) %>% 
  unnest(species) %>% 
  mutate(species = trimws(species), 
         value=1) %>% 
  pivot_wider(names_from="species", values_fill = 0)
#> # A tibble: 3 × 4
#>    year species_1 species_2 species_3
#>   <dbl>     <dbl>     <dbl>     <dbl>
#> 1  2000         1         0         0
#> 2  2003         1         1         0
#> 3  2005         0         1         1

^{Created on 2022-06-30 by the reprex package (v2.0.1)}

Convert a dataframe to presence absence matrix

One possibility:

library(reshape2)
df2 <- melt(df, id.var = "V1")
with(df2, table(V1, value))

#         value
# V1      A B C D E F
#   File1 1 1 1 0 0 0
#   File2 1 1 0 1 0 0
#   File3 0 0 0 0 1 1

Create a presence/absence matrix from two variables of a dataframe but adding the information of one third variable from the df instead of value 1

Try dcast from reshape2

library(reshape2)
dcast(mydf, day~paste0('ind_', individual),
                   value.var='weight', sum, fill=NA_real_)
#  day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1   1    20    18    36    36    41    NA    NA
#2   2    25    NA    40    NA    46    30    12

and for 'length'

dcast(mydf, day~paste0('ind_', individual),
                      value.var='length', sum, fill=NA_integer_)
#  day ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7
#1   1    12    23    26    15    56    NA    NA
#2   2    16    NA    30    NA    60    30    35

Or using base R

xtabs(weight~day+individual, mydf)

how to create presence/absence data frame with data of columns?

You can extract the unique levels from the data and use sapply with table.

lvls <- sort(unique(unlist(df)))
sapply(df, function(x) table(factor(x, lvls)))

#   sp1 sp2 sp3
#e1   1   1   0
#e2   1   0   1
#e3   0   1   1
#e4   1   1   0
#e5   0   0   1

Transforming matrix of presence/absence to Data.frame of vertice connection. (Removing duplicated rows with eeuqal unordered values)

The problem can be solved using the purrr package.

# reproduce input
mat <- matrix(
  data = c(1,0,0,0,0,0,0,
           0,1,1,0,1,0,0,
           0,0,1,0,1,0,0,
           1,1,0,0,0,1,0,
           0,0,0,0,0,1,0,
           0,1,0,0,1,0,0,
           1,0,1,1,1,0,0), nrow = 7, ncol = 7)
colnames(mat) <- LETTERS[1:7]
rownames(mat) <- c("Aiz", "Aren", "Atx", "Berr", "Bra", "Bur", "Cab")

# convert to dataframe
df <- mat %>% 
  dplyr::as_tibble() %>% 
  dplyr::bind_cols(
    tibble::tibble(Names = rownames(mat)))

# calculate the connections
purrr::map_df(df$Names, function(x){
  output <-purrr::map_df(df$Names, function(y){
    if(x >= y) return(tibble::tibble()) # avoid double counting
    tibble::tibble(
      siteA = x,
      siteB = y,
      weight = sum(as.integer(df[df$Names==x,1:7]) & as.integer(df[df$Names==y,1:7])))
    })
  })

good luck

Create a presence-absence matrix with presence on specific dates

We can try the code below

library(data.table)

setDT(df1)
setDT(df2)

na.omit(
  dcast(
    df1[df2, .(Date, ID), on = .(Start < Date, End > Date)][df1, on = .(ID)],
    Date ~ ID,
    fun.aggregate = length
  )
)

which gives

         Date Afr Ahe Art
1: 2015-07-01   1   0   0
2: 2015-07-02   1   0   1
3: 2015-07-03   1   0   1

Data

> dput(df1)
structure(list(ID = c("Afr", "Ahe", "Art"), Start = structure(c(16615, 
17153, 16617), class = "Date"), End = structure(c(16847, 17586,
18382), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))

> dput(df2)
structure(list(Date = structure(c(16617, 16618, 16619), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))