Calculating all distances between one point and a group of points efficiently in R
Rather than iterating across data points, you can just condense that to a matrix operation, meaning you only have to iterate across K
.
# Generate some fake data.
n <- 3823
K <- 10
d <- 64
x <- matrix(rnorm(n * d), ncol = n)
centers <- matrix(rnorm(K * d), ncol = K)
system.time(
dists <- apply(centers, 2, function(center) {
colSums((x - center)^2)
})
)
Runs in:
utilisateur système écoulé
0.100 0.008 0.108
on my laptop.
Calculate distance from one point to the others by R
See ?distm
: you can use two sets of points:
distm(coordinaties[6,2:1],coordinaties[-6,2:1])
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
[1,] 11075.61 11075.61 0 0 0 0 0 0 0 10183.02 10183.02 10183.02
[,13] [,14] [,15] [,16] [,17] [,18]
[1,] 10183.02 10183.02 0 0 0 0
Calculate minimum distance between groups of points in data frame
One way in base R using combn
:
do.call(rbind, combn(unique(df$Group), 2, function(x) {
df1 <- subset(df, Group == x[1])
df2 <- subset(df, Group == x[2])
df3 <- merge(df1, df2, by = 'Time')
value <- abs(df3$Value.x - df3$Value.y)
data.frame(combn = paste(x, collapse = ''),
time = df3$Time[which.max(value)],
max_difference = max(value))
}, simplify = FALSE))
# combn time max_difference
#1 AB 1 4
#2 AC 0 8
#3 BC 0 5
We create all combination of unique
Group
values, subset
the data for them and merge
them on Time
. Subtract the corresponding value columns and return the max
difference between them.
data
df <- structure(list(Time = c(0L, 1L, 2L, 0L, 1L, 2L, 0L, 0L, 0L),
Value = c(1, 2, 3, 4, 6, 6, 7, 7, 9), Group = c("A", "A",
"A", "B", "B", "B", "C", "C", "C")),
class = "data.frame", row.names = c(NA, -9L))
R: Calculate distance between consecutive points per group and group them
I worked out a little use case that can get you started. It is a base approach using a for loop and aggregation based on vector of columns to which you could apply a paired vector of functions by which to aggregate.
df <- read.table(text = "
Group X Y Z Distance
1 110 3762 431 10 NA
2 112 4950 880 10 NA
3 113 5062 873 20 NA
4 113 5225 874 30 163.00307
5 113 5262 875 10 37.01351
6 113 5300 874 20 38.01316
7 114 5300 874 30 NA
8 114 5300 874 20 38.01316", header = T, stringsAsFactors = F)
aggregateIt <- function(df = data, #data.frame
returnRaw = F, #to get the raw unaggregted df (only first case from column `grouped` by `subgroup` usable in this application)
colsToAgg = c("Z1", "Z2", "Z3"), #cols to aggregate
how = c("sum", "sum", "max")) #how to aggregate the columns, `Z1` by sum, `Z2` by sum and `Z3` by max
{
count <- 1L
result <- vector("integer", nrow(df))
grouped <- vector("character", nrow(df))
for(i in seq_len(length(result)-1L)){
if(df$Group[i] != df$Group[i+1L]) {
result[i] <- count
grouped[i] <- "no"
count <- count + 1L
if((i+1L) == length(result)) {
result[i+1L] <- count
grouped[i+1L] <- "no"
}
} else {
if(df$Distance[i+1L] > 100L) {
result[i] <- count
grouped[i] <- "no"
count <- count + 1L
if((i+1L) == length(result)) {
result[i+1L] <- count
grouped[i+1L] <- "no"
}
} else {
result[i] <- count
grouped[i] <- "yes"
if((i+1L) == length(result)) {
result[i+1L] <- count
grouped[i+1L] <- "yes"
}
}
}
}
df <- within(df, {subgroup <- result; grouped <- grouped})
if(returnRaw) return(df)
A <- Reduce(function(a, b) merge(a, b, by = "subgroup"),
lapply(seq_along(how), function(x) aggregate(.~subgroup, df[, c(colsToAgg[x], "subgroup")], how[x])))
B <- df[!duplicated(df$subgroup, fromLast = F), c("Group", "subgroup", "grouped")]
out <- merge(A, B, by = "subgroup")
return(out[, c("Group", colsToAgg, "grouped")])
}
aggregateIt(df = df, colsToAgg = "Z", how = "sum")
# Group Z grouped
#1 110 10 no
#2 112 10 no
#3 113 20 no
#4 113 60 yes
#5 114 50 yes
Not claiming this is most efficient solution but it points out the solution. Hope this helps!
Calculate distance of one point in DF with all other points in R
If your points exist in 2D space (e.g. Euclidean), then you can use the Cluster package:
library(cluster)
data(agriculture)
## Dissimilarities using Euclidean metric
d.agr <- daisy(agriculture, metric = "euclidean")
as.matrix(d.agr)
The final matrix will give you the "distance" between each point, according to the metric you set (Euclidean in the above example).
Calculate the distances between pairs of points in r
rbind(x,y)
has 2 rows, 10 columns and is interpreted as 2 points in 10-dimensional space. dist(rbind(x,y))
is calculating the Euclidean distance between these 2 points.
Calculating the distance between two long/lat points in the same data.frame
This is a easily solved with the distGeo
function (similar to your functions above) from geosphere package:
library(geosphere)
#calculate distances in meters
df$distance<-distGeo(df[,c("lon1", "lat1")], df[,c("lon2", "lat2")])
#remove columns
df[, -c(3:6)]
customer_id id distance
1 353808874 8474 498.2442
2 69516747 8107 668.4088
3 357032052 1617436 366.9541
4 307735090 7698 531.0785
5 307767260 1617491 343.3051
Distance between a matrix of points, simple if & for's
You could use the dist function:
df <- data.frame(easting=easting,northing = northing)
dist(df) # or round(dist(df,upper=T,diag=T),3)
example for the first three rows:
round(dist(df[1:3,], upper=T,diag=T),3)
1 2 3
1 0.000 310.409 581.588
2 310.409 0.000 271.221
3 581.588 271.221 0.000
Comparison:
round(dist(df[1:3,]),3)
1 2
2 310.409
3 581.588 271.221
Calculating distance between coordinates and reference point
points_in_circle()
returns the points within a given radius
from a reference point. The following returns all points within 1000km from the reference point:
library(spatialrisk)
points_in_circle(df, lat_center = 52.92343, lon_center = 5.04127,
lon = Longitude, lat = Latitude, radius = 1e6)
#> Day Month Year Location.Receiver Transmitter
#> 1095729 26 07 2021 Den Oever Ijsselmeer A69-1602-59776
#> 1072657 17 08 2021 Den Oever Ijsselmeer A69-1602-59776
#> 1092667 18 08 2021 Den Oever Ijsselmeer A69-1602-59776
#> 716601 19 08 2021 Den Oever Ijsselmeer A69-1602-59769
#> 1077415 19 08 2021 Den Oever Ijsselmeer A69-1602-59776
#> 1180267 05 08 2021 Medemblik Ijsselmeer, gemaal A69-1602-59777
#> Batch.location BatchNr Latitude Longitude Date distance_m
#> 1095729 Den Oever 8 52.92343 5.04127 2021-07-26 0.00
#> 1072657 Den Oever 8 52.92343 5.04127 2021-08-17 0.00
#> 1092667 Den Oever 8 52.92343 5.04127 2021-08-18 0.00
#> 716601 Den Oever 1 52.92343 5.04127 2021-08-19 0.00
#> 1077415 Den Oever 8 52.92343 5.04127 2021-08-19 0.00
#> 1180267 Den Oever 9 52.76098 5.12172 2021-08-05 18875.55
Created on 2021-12-02 by the reprex package (v2.0.1)
Related Topics
How to Create Datatable with Complex Header in R Shiny
Visualise Distances Between Texts
How to Show the Progress of Code in Parallel Computation in R
Multiple Graphs Over Multiple Pages Using Ggplot
Difference Between Read.Csv() and Read.Csv2() in R
Remove Parenthesis from a Character String
Extract Non Null Elements from a List in R
Modify Variable Within R Function
Sum of Antidiagonal of a Matrix
Generate Matrix with Iid Normal Random Variables Using R
Using Grep in R to Delete Rows from a Data.Frame
R:Ggplot2:Facet_Grid:How Include Math Expressions in Few (Not All) Labels
Centering Image and Text in R Markdown for a PDF Report
Why Is Foreach() %Do% Sometimes Slower Than For
How to Change the Na Color from Gray to White in a Ggplot Choropleth Map
Filter One Selectinput Based on Selection from Another Selectinput