Using grep in R to delete rows from a data.frame
You can use TRUE/FALSE subsetting instead of numeric.
grepl
is like grep, but it returns a logical
vector. Negation works with it.
d[!grepl("K",d$z),]
x y z
1 1 1 apple
2 1 2 pear
3 1 3 banana
4 1 4 A
5 1 5 B
6 1 6 C
7 1 7 D
8 1 8 E
9 1 9 F
10 1 10 G
Delete rows in text file with grep (?) in R
myfun
will take the text file as input and returns a list of data frames. what
argument will set whether user wants to extract data or basic statistics from the text file.
myfun <- function( file, what )
{
x <- readLines( file )
g1 <- which( grepl("ROI:", x))
if( what == 'Basic Stats'){
g2 <- which( grepl('Basic Stats', x))
} else if ( what == "Histogram" ) {
g2 <- which( grepl("Histogram", x))
} else {
stop( 'what value is not supported')
}
df_list <- list()
counter <- 0
while( counter < length( g1 ))
{
counter <- counter + 1
if( counter != length( g1 ) ){
low <- g1[ counter ]
high <- g1[ counter + 1 ]
} else {
low <- g1[ counter ]
high <- length( x )
}
min_ind <- min( g2[ which( g2 > low & g2 < high ) ] )
title <- ifelse( counter == 1,
list( gsub( '\\[|\\]', '', unlist( strsplit( x[ low ], "\ ") )[ 2:4 ] ) ),
list( gsub( '\\[|\\]', '', unlist( strsplit( x[ low ], "\ ") )[ 4:6 ] ) ) )
if( what == 'Basic Stats'){
min_ind <- min( g2[ which( g2 > low & g2 < high ) ] )
x1 <- data.frame( do.call( 'rbind', strsplit( x[ min_ind: ( min_ind + 5 ) ], "\t")), stringsAsFactors = FALSE )
colnames( x1 ) <- x1[1, ]
x1 <- x1[2:5, ]
x1 <- do.call( 'cbind', list( x1, do.call( 'rbind', title )))
colnames(x1)[(ncol(x1)-2): ncol(x1)] <- c( 'ROI', 'color', 'points') # column names of last 3 columns
colnames(x1) <- gsub("\ ", '', colnames(x1)) # remove spaces
# convert from character to numeric data type
x1[, 2:5 ] <- lapply( x1[, 2:5 ], function(x) as.numeric( as.character( x ) ) )
df_list[[ as.character(counter) ]] <- x1
} else if ( what == "Histogram" ) {
x1 <- data.frame( do.call( 'rbind', strsplit( x[ min_ind: (high-1) ], "\t")), stringsAsFactors = FALSE )
# column names and band and bin columns
colnames( x1 ) <- x1[1, ]
colnames(x1)[1] <- 'Histogram'
x1$Band <- rep( gsub("[Band\ ]", '', grep( "Band", x1$Histogram, value = TRUE )),
diff( c( grep( "Histogram", x1$Histogram ), ( nrow(x1) + 1 ) ) ) )
x1$Bin <- rep( gsub("[Bin=\ ]", '', grep( "Bin", x1$Histogram, value = TRUE )),
diff( c( grep( "Histogram", x1$Histogram ), ( nrow(x1) + 1 ) ) ) )
x1 <- x1[! grepl( 'Histogram', x1$Histogram ), ]
x1$Histogram <- NULL
x1 <- do.call( 'cbind', list( x1, do.call( 'rbind', title )))
colnames(x1)[(ncol(x1)-2): ncol(x1)] <- c( 'ROI', 'color', 'points') # column names of last 3 columns
colnames(x1) <- gsub("\ ", '', colnames(x1)) # remove spaces
# convert from character to numeric data type
x1[, c(1:7, 10) ] <- lapply( x1[, c(1:7, 10) ], function(x) as.numeric( as.character( x ) ) )
df_list[[ as.character(counter) ]] <- x1
}
}
return( df_list )
}
1. Extract Basic Statistics:
df_list <- myfun(file = "test2.txt", what = 'Basic Stats')
df_list[[1]]
# BasicStats Min Max Mean Stdev ROI color points
# 2 Band 1 0.013282 0.133982 0.061581 0.034069 red_1 Red 20
# 3 Band 2 0.009866 0.112935 0.042688 0.026618 red_1 Red 20
# 4 Band 3 0.008304 0.037059 0.018434 0.007515 red_1 Red 20
# 5 Band 4 0.004726 0.040089 0.018490 0.009605 red_1 Red 20
df_list[[2]]
# BasicStats Min Max Mean Stdev ROI color points
# 2 Band 1 0.032262 0.124425 0.078073 0.028031 red_2 Red 12
# 3 Band 2 0.021072 0.064156 0.037923 0.012178 red_2 Red 12
# 4 Band 3 0.013404 0.066043 0.036316 0.014787 red_2 Red 12
# 5 Band 4 0.005162 0.055781 0.015526 0.013255 red_2 Red 12
df_list[[3]]
# BasicStats Min Max Mean Stdev ROI color points
# 2 Band 1 0.037488 0.107830 0.057892 0.018964 red_3 Red 12
# 3 Band 2 0.028140 0.072370 0.045340 0.014507 red_3 Red 12
# 4 Band 3 0.014960 0.112973 0.032751 0.026575 red_3 Red 12
# 5 Band 4 0.006566 0.029133 0.018201 0.006897 red_3 Red 12
2. Extract Data:
df_list <- myfun(file = "test2.txt", what = 'Histogram')
head
head(df_list[[1]])
# DN Npts Total Percent AccPct Band Bin ROI color points
# 2 0.013282 1 1 5 5 1 0.00047 red_1 Red 20
# 3 0.013755 0 1 0 5 1 0.00047 red_1 Red 20
# 4 0.014228 0 1 0 5 1 0.00047 red_1 Red 20
# 5 0.014702 0 1 0 5 1 0.00047 red_1 Red 20
# 6 0.015175 0 1 0 5 1 0.00047 red_1 Red 20
# 7 0.015648 0 1 0 5 1 0.00047 red_1 Red 20
summary statistics:
library('data.table')
df1 <- df_list[[1]]
setDT(df1)[, .( Min = min( DN ),
Max = max( DN ),
Mean = mean( DN ),
Stdev = sd( DN ) ), by = 'Band']
# Band Min Max Mean Stdev
# 1: 1 0.013282 0.133982 0.07363182 0.035048124
# 2: 2 0.009866 0.112935 0.06140034 0.029928470
# 3: 3 0.008304 0.037059 0.02268180 0.008349628
# 4: 4 0.004726 0.040089 0.02240761 0.010268456
df2 <- df_list[[2]]
setDT(df2)[, .( Min = min( DN ),
Max = max( DN ),
Mean = mean( DN ),
Stdev = sd( DN ) ), by = 'Band']
# Band Min Max Mean Stdev
# 1: 1 0.032262 0.124425 0.07834352 0.02676153
# 2: 2 0.021072 0.064156 0.04261389 0.01251049
# 3: 3 0.013404 0.066043 0.03972310 0.01528497
# 4: 4 0.005162 0.055781 0.03047151 0.01469855
Removing rows surrounding a grepl pattern match in R
Using grep
you can get the row number where you find a pattern. Increment the row number by 1 and remove both the rows.
inds <- grep("my_string",df$V1)
result <- df[-unique(c(inds, inds + 1)), ]
Using tidyverse
-
library(dplyr)
library(stringr)
result <- df %>%
filter({
inds <- str_detect("my_string", V1)
!(inds | lag(inds, default = FALSE))
})
Using grep or dplyr to conditionally remove rows and replace others?
Try this (using the data you provided) and now updated:
library(tidyverse)
#Data
df_rep <- data.frame(IDD, Valve, Seconds,stringsAsFactors = F)
#Replace all NA with zero
df_rep[df_rep=='NA']<-0
#Code
df_rep %>% group_by(IDD) %>% mutate(key=1:n(),
Flag=ifelse(key==2 & Seconds==0,1,0)) %>%
filter(Flag!=1) %>% ungroup() %>% select(-c(key,Flag))
Which produces:
# A tibble: 4 x 3
IDD Valve Seconds
<chr> <chr> <chr>
1 999674642 1 0
2 999269097 1 0
3 998496846 0 12
4 998067840 0 5
Delete rows containing specific strings in R
This should do the trick:
df[- grep("REVERSE", df$Name),]
Or a safer version would be:
df[!grepl("REVERSE", df$Name),]
Delete rows with grep() and lapply with data.table
DT[mapply( grepl, id1, id2), ]
# id id1 id2
# 1: 52 3505H6 3505H6856
# 2: 52 3505H6 3505H6856
# 3: 52 3505H6 3505H6856
# 4: 54 3505H6 3505H67158
# 5: 54 3505H6 3505H67158
# 6: 84 3505H6 3505H63188
# 7: 84 3505H6 3505H63188
# 8: 129 3505H6 3505H664133
# 9: 129 3505H6 3505H664133
# 10: 130 3505H6 3505H658134
# 11: 130 3505H6 3505H658134
# 12: 130 3505H6 3505H658134
Grep in R to remove entire row if city column cell is blank
Maybe
subset(mydata,city!="")
? This assumes that the city
column is stored in such a way that the blanks are zero-length strings. If they might be whitespace, then something like
grep("^[[:space:]]*$",mydata$city,invert=TRUE)
would find the elements you wanted. Since grepl
doesn't have an invert
argument you could use (edit: thanks to @JoshO'Brien)
subset(mydata,!grepl("^[[:space:]]*$",city))
(No reproducible example given, so neither of these is tested.)
Remove rows containing specific strings
- We can first
grep
the indices of the rows contains one ofremove_list
words , then exclude them from your data.frame
remove_ind <- lapply(strsplit(remove_list , "\\|")[[1]] ,
\(x) grep(x , PKV$Aufzeichnungen , fixed = T)) |>
unlist() |> unique()
#> [1] 12 15 10 13
PKV[-remove_ind,]
- output
ID Aufzeichnungen
1 1 Aufzeichnungen
2 1 07.03.22 A: stechender Schmerz
3 1 scharfkantig
4 1 D/B:
5 1 T:
6 1 pat aht an 36 üz distal
7 1 seit paartagen
8 1 36 vipr++
9 1 perk-
11 1 üz bilfuird
14 1 pat
16 1
17 1 pat knirscht
18 1 schiene empohlen
19 1 pat meldet sich..
Related Topics
Floor a Year to the Decade in R
Switch R Script from Non-Interactive to Interactive
Change the Color of the Axis Labels
Conditional 'Echo' (Or Eval or Include) in Rmarkdown Chunks
Difference Between Pull and Select in Dplyr
Filtering Data Frame Based on Na on Multiple Columns
Convert Scientific Notation to Numeric, Preserving Decimals
Can't Open Sockets for Parallel Cluster
Modify Variable Within R Function
Order and Color of Bars in Ggplot2 Barplot
Order of Legend Entries in Ggplot2 Barplots with Coord_Flip()
Using Grep in R to Delete Rows from a Data.Frame
R:Ggplot2:Facet_Grid:How Include Math Expressions in Few (Not All) Labels
Error When I Try to Predict Class Probabilities in R - Caret
How to Write from R to the Clipboard on a MAC