Sum amount last 6 month prior to the date of transaction
This is simply a non-equi join in data.table. You can create a variable of date - 180
and limit the join between the current date and that variable. This should be fairly quick
library(data.table)
setDT(dt)[, date_minus_180 := date - 180]
dt[, amnt_6_m := .SD[dt, sum(amount, na.rm = TRUE),
on = .(to = from, date <= date, date >= date_minus_180), by = .EACHI]$V1]
head(dt, 10)
# id from to date amount date_minus_180 amnt_6_m
# 1: 18529 5370 9356 2005-05-31 24.4 2004-12-02 0.0
# 2: 13742 5370 5605 2005-08-05 7618.0 2005-02-06 0.0
# 3: 9913 5370 8567 2005-09-12 21971.0 2005-03-16 0.0
# 4: 956 8605 5370 2005-10-05 5245.0 2005-04-08 0.0
# 5: 2557 5370 5636 2005-11-12 2921.0 2005-05-16 5245.0
# 6: 1602 6390 5370 2005-11-26 8000.0 2005-05-30 0.0
# 7: 18669 5370 8933 2005-11-30 169.2 2005-06-03 13245.0
# 8: 35900 5370 8483 2006-01-31 71.5 2005-08-04 13245.0
# 9: 48667 8934 5370 2006-03-31 14.6 2005-10-02 0.0
# 10: 51341 5370 7626 2006-04-11 4214.0 2005-10-13 8014.6
Calculate last 12 Months + Sum of all transaction before last 12 months
with cte as (
select
row_number() over(partition by AccountNo order by CreatedDate desc) as rn,
AccountNo, Amount, CreatedDate
from TransactionsTest
), cte2 as (
select
c.AccountNo, c.CreatedDate, c.Amount, 0 as Ord
from cte as c
where rn <= 12
union all
select
c.AccountNo, null as CreatedDate, sum(c.Amount) as Amount, 1 as Ord
from cte as c
where rn > 12
group by c.AccountNo
)
select
AccountNo, CreatedDate, Amount
from cte2
order by AccountNo, Ord, CreatedDate
sql fiddle demo
How can I keep track of total transaction amount sent from an account each last 6 month?
A way using dplyr
could be :
library(dplyr)
df %>%
group_by(from) %>%
mutate(total_trx = purrr::map_dbl(date,
~sum(amount[between(date, .x - 180, .x)])))
# id from to date amount total_trx
# <int> <int> <int> <date> <dbl> <dbl>
# 1 18529 5370 9356 2005-05-31 24.4 24.4
# 2 13742 5370 5605 2005-08-05 7618 7642.
# 3 9913 5370 8567 2005-09-12 21971 29613.
# 4 2557 5370 5636 2005-11-12 2921 32534.
# 5 18669 5370 8933 2005-11-30 169. 32679.
# 6 35900 5370 8483 2006-01-31 71.5 32751.
# 7 51341 5370 7626 2006-04-11 4214 7376.
# 8 83324 5370 9676 2006-08-31 261. 4475.
# 9 100277 5370 9105 2006-10-31 182 443.
#10 103444 5370 9772 2006-11-08 16927 17370.
If you are data is huge you can use the above approach in data.table
which might be efficient.
library(data.table)
setDT(df)[, total_trx := sapply(date, function(x)
sum(amount[between(date, x - 180, x)])), from]
Sum and Count by month, shown with last day of that month
You may try with the following statement:
SELECT
EOMONTH(DATEFROMPARTS(YEAR(Trandate), MONTH(Trandate), 1)) AS YearT,
branch, channelID,
SUM(amount) AS TAmount,
COUNT(*) AS TranT
FROM (VALUES
('20190501', 1, 2, 2000),
('20190511', 1, 2, 2200),
('20200309', 1, 2, 5600),
('20200315', 1, 2, 600),
('20191012', 2, 10, 12000),
('20191012', 2, 10, 12000),
('20191115', 4, 7, 4400),
('20200215', 4, 2, 2500)
) v (Trandate, channelID, branch, amount)
GROUP BY DATEFROMPARTS(YEAR(Trandate), MONTH(Trandate), 1), branch, channelID
ORDER BY DATEFROMPARTS(YEAR(Trandate), MONTH(Trandate), 1)
Result:
YearT branch channelID TAmount TranT
2019-05-31 2 1 4200 2
2019-10-31 10 2 24000 2
2019-11-30 7 4 4400 1
2020-02-29 2 4 2500 1
2020-03-31 2 1 6200 2
Calculating the balance from the previous amount based on DATE?
So with a CTE for the data, you can use the WINDOW FUNCTUON version of SUM to get the result you want.
WITH data(number_id, value, date) AS (
SELECT column1, column2, to_date(column3, 'YYYY-MM-DD') FROM VALUES
(1, 10, '2022-01-01'),
(1, 20, '2022-01-02'),
(1, 30, '2022-01-04'),
(1, 40, '2022-01-07'),
(2, 110, '2022-01-01'),
(2, 120, '2022-01-02'),
(2, 130, '2022-01-04'),
(2, 140, '2022-01-07')
)
SELECT number_id
,value
,sum(value)over(partition by number_id order by date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) as balance
date
FROM data
ORDER BY 1, 4;
gives:
NUMBER_ID | VALUE | BALANCE | DATE |
---|---|---|---|
1 | 10 | 10 | 2022-01-01 |
1 | 20 | 30 | 2022-01-02 |
1 | 30 | 60 | 2022-01-04 |
1 | 40 | 100 | 2022-01-07 |
2 | 110 | 110 | 2022-01-01 |
2 | 120 | 230 | 2022-01-02 |
2 | 130 | 360 | 2022-01-04 |
2 | 140 | 500 | 2022-01-07 |
How can I keep track of total transaction amount received by an account each last 6 month?
We can use map2_dbl
and take sum
of amount
that lie in the 6-month range.
library(dplyr)
library(purrr)
data %>%
mutate(amt = map2_dbl(from, date,
~sum(amount[to == .x & between(date, .y - 180, .y)])))
calculating amount total and frequency of transactions before a certain date column
IIUC, you can assign a conditional column based on the difference of subscribe date and trx date and then group by:
Convert date columns to datetime from strings (Ignore this block if already date)
ad['subscribe_date'] = pd.to_datetime(ad['subscribe_date'])
ad['trx_month'] = pd.to_datetime(ad['trx_month'])
Then use:
d = {'count':'frequency','sum':'monetary'}
diff_ = ad['subscribe_date'].sub(ad['trx_month']).dt.days
out = (ad.assign(Before_After=
np.select([diff_<0,diff_>0],["After","Before"],"Subscribed_date"))
.groupby(['customer','Before_After'])['trx_amount'].agg(['count','sum'])
.rename(columns=d))
print(out)
frequency monetary
customer Before_After
Clark After 2 120
Before 2 80
Fay After 2 130
Stones After 1 45
Before 1 200
Subscribed_date 1 90
EDIT: Per your edit , you can create a dictionary with Before
and After
as Keys and have the respective dataframe as values
d = {'count':'frequency','sum':'monetary'}
diff_ = ad['subscribe_date'].sub(ad['trx_month']).dt.days
out = (ad.assign(Before_After=
np.select([diff_<0,diff_>0],["After","Before"],"Subscribed_date"))
.groupby(['customer','Before_After'])['trx_amount'].agg(['count','sum'])
.rename(columns=d)).unstack().swaplevel(axis=1)
final_dict = {i: out.loc[:,i] for i in out.columns.levels[0]}
print(final_dict['Before'],'\n\n',final_dict["After"])
frequency monetary
customer
Clark 2.0 80.0
Fay NaN NaN
Stones 1.0 200.0
frequency monetary
customer
Clark 2.0 120.0
Fay 2.0 130.0
Stones 1.0 45.0
Related Topics
How to Do Str_Extract with Base R
Paste Several Column Values into One Value in R
Dplyr: Grouping and Summarizing/Mutating Data with Rolling Time Windows
Saving a File to Sharepoint with R
Inline R Code in Yaml for Rmarkdown Doesn't Run
Does R-Server or Shiny Server Create a New R Process/Instance for Each User
Combine Result from Top_N with an "Other" Category in Dplyr
How to Color the Ocean Blue in a Map of the Us
Rotate X Axis Labels 45 Degrees on Grouped Bar Plot R
Print the Sourced R File to an Appendix Using Sweave
How to Change Factor Labels into String in a Data Frame
Rscript Detect If R Script Is Being Called/Sourced from Another Script
How to Show a Loading Screen When the Output Is Being Calculated in a Background Process
How to Configure Box.Color in Directlabels "Draw.Rects"
How Many Elements in a Vector Are Greater Than X Without Using a Loop