MySQL Group_Concat Duplicates

mysql GROUP_CONCAT duplicates

You need to use the DISTINCT option:

GROUP_CONCAT(DISTINCT animal)

How to suppress duplicates in GROUP_CONCAT MySQL

Your goal is a bit different than that of your linked questions. You only want to remove the duplicate surname, if two persons with the same address have got the same surname.

You can get this with a two step process:

  • First we concat the titles and both fornames by groups of address and surname
  • In the outer query we concat the first part and the surnames by groups of addresses only
  • Addition: we replace the double blanks for persons without a second forename too.

You can use this query:

SELECT 
REPLACE(GROUP_CONCAT(
CONCAT_WS(' ', t.name, t.surname)
SEPARATOR ' & '
), ' ', ' ') AS Salutation,
addresses.the_address
FROM (
SELECT
GROUP_CONCAT(
CONCAT_WS(' ', title, forname_1, forename_2)
SEPARATOR ' & '
) name,
surname,
address_id
FROM
people
GROUP BY
address_id, surname
) t
INNER JOIN
addresses
ON
addresses.address_id = t.address_id
GROUP BY
t.address_id;

See it working in this demo
This solution takes into account that a third person with a different surname can live at the same address, i.e. a mother-in-law.

Explanation

The inner query

SELECT
GROUP_CONCAT(
CONCAT_WS(' ', title, forname_1, forename_2)
SEPARATOR ' & '
) name,
surname,
address_id
FROM
people
GROUP BY
address_id, surname

concats only the title and the given names of the persons living at the same address and having the same surname (see second query in the demo).

Then we do the same group_concat as you have done before and use in the last step the REPLACE function to eliminate double blanks.

Group_Concat with duplicates

Just join the table with itself, using invoice_number as key.

SELECT
invoices.item_id,
invoices.invoice_number,
GROUP_CONCAT(related.item_id) as shared_item_ids,
ROUND(AVG(amount)/100,2) as invoice_amount,
currency
FROM invoices
JOIN invoices related ON related.invoice_number = invoices.invoice_number
GROUP BY invoices.item_id

MySQL - GROUP_CONCAT returns duplicate data, can't use DISTINCT

You can resolve this by extracting the tag grouping to its own subquery:

SELECT
recipe.*,
GROUP_CONCAT(recipe_detail.ingredient_id) AS iid,
GROUP_CONCAT(ingredient.name) AS iname,
GROUP_CONCAT(ingredient_mfr.abbr) AS mabbr,
(
SELECT GROUP_CONCAT(recipe_tag.name)
FROM recipe_tag
INNER JOIN recipe_tagmap
ON recipe_tagmap.tag_id = recipe_tag.id
WHERE recipe_tagmap.recipe_id = recipe.id
) AS tag

FROM recipe
LEFT JOIN recipe_detail
ON recipe.id = recipe_detail.recipe_id
LEFT JOIN ingredient
ON recipe_detail.ingredient_id = ingredient.id
LEFT JOIN ingredient_mfr
ON ingredient.mfr_id = ingredient_mfr.id

WHERE recipe.user_id = 1
GROUP BY recipe.id

(example fiddle)

MySQL - GROUP_CONCAT eliminate duplicates when joining multiple tables

Instead of joining three tables a good solution is to use scalar subqueries. For example:

select
*,
(select group_concat(p.id order by p.price)
from prices p where p.category_id = c.id) as PriceId,
(select group_concat(p.price order by p.price)
from prices p where p.category_id = c.id) as PricePrice,
(select group_concat(v.id order by v.id)
from videos v where v.category_id = c.id) as VideoId,
(select group_concat(v.uuid order by v.id)
from videos v where v.category_id = c.id) as VideoUUID
from categories c
group by id

Result:

id  token                PriceId     PricePrice           VideoId      VideoUUID                           
--- -------------------- ----------- -------------------- ------------ -----------------------------------
1 Wyatt Reinger (ZW) 2,1,3 2.51,2.61,4.45 1,2,3,4 3a817d01,3222679e,63cdc038,e8d8edf4
2 Donna Cronin (BL) 4 4.76 5 93f8a404
3 Ally Kertzmann (GY) 5,6 1.83,1.84 6,7,8 6f2459a7,463127ab,4bf357ba
4 Talia Torp (AF) 7,8 2.61,3.32 9,10,11,12 0cedbd0a,8b21afd7,ea616692,ed2b10d7
5 Delphine Lakin (TL) 11,12,9,10 1.65,3.27,3.27,3.36 13,14,15,16 6217a488,7f52a97a,de11ba64,b49b6ddc

See running example at SQL Fiddle.

The problem of joining three tables is that it produces many duplicate values that complicate the aggregation.

MySQL Trigger Nested Group_Concat Duplicates

Change the second Select to

-- Table1

CREATE TABLE Table1 (id INT,selection1 varchar(50),selection2 varchar(50),results varchar(1000));

INSERT INTO Table1 (id,selection1,selection2,results)
VALUES
(1,null,null,null),
(2,null,null,null),
(3,null,null,null);

select * from Table1





id | selection1 | selection2 | results
-: | :--------- | :--------- | :------
1 | null | null | null
2 | null | null | null
3 | null | null | null
-- Table2

CREATE TABLE Table2 (stocks1 VARCHAR(50),stocks2 VARCHAR(50));

INSERT INTO Table2 (stocks1,stocks2)
VALUES
-- ('BRK','BRK'),
-- ('BRK','BRK');

('BRK','GOOG'),
('INTC','NKE'),
('TSLA','APPL'),
('APPL','NKE'),
('TSLA','FB'),
('NKE','BRK');

select * from Table2





stocks1 | stocks2
:------ | :------
BRK | GOOG
INTC | NKE
TSLA | APPL
APPL | NKE
TSLA | FB
NKE | BRK
CREATE TRIGGER trigger1
BEFORE UPDATE
ON Table1
FOR EACH ROW
BEGIN
set NEW.results =
(SELECT CONCAT('<br>',GROUP_CONCAT( val
SEPARATOR '<br>'))
FROM (SELECT stocks1 as val FROM
Table2
WHERE
NEW.selection2 = 'x'
UNION
SELECT stocks2 FROM
Table2
WHERE
NEW.selection2 = 'x') t1
);
end;
update Table1
set selection1 = 'x', selection2 = 'x'
where id = 1
select * from Table1

id | selection1 | selection2 | results
-: | :--------- | :--------- | :---------------------------------------------------
1 | x | x | <br>BRK<br>INTC<br>TSLA<br>APPL<br>NKE<br>GOOG<br>FB
2 | null | null | null
3 | null | null | null

db<>fiddle here

Strange duplicate behavior from GROUP_CONCAT of two LEFT JOINs of GROUP_BYs

Your second query is of the form:

q1 -- PK user_id
LEFT JOIN (...
GROUP BY user_id, t.tag
) AS q2
ON q2.user_id = q1.user_id
LEFT JOIN (...
GROUP BY user_id, c.category
) AS q3
ON q3.user_id = q1.user_id
GROUP BY -- group_concats

The inner GROUP BYs result in (user_id, t.tag) & (user_id, c.category) being keys/UNIQUEs. Other than that I won't address those GROUP BYs.

TL;DR When you join (q1 JOIN q2) to q3 it is not on a key/UNIQUE of one of them so for each user_id you get a row for every possible combination of tag & category. So the final GROUP BY inputs duplicates per (user_id, tag) & per (user_id, category) and inappropriately GROUP_CONCATs duplicate tags & categories per user_id. Correct would be (q1 JOIN q2 GROUP BY) JOIN (q1 JOIN q3 GROUP BY) in which all joins are on common key/UNIQUE (user_id) & there is no spurious aggregation. Although sometimes you can undo such spurious aggregation.

A correct symmetrical INNER JOIN approach: LEFT JOIN q1 & q2--1:many--then GROUP BY & GROUP_CONCAT (which is what your first query did); then separately similarly LEFT JOIN q1 & q3--1:many--then GROUP BY & GROUP_CONCAT; then INNER JOIN the two results ON user_id--1:1.

A correct symmetrical scalar subquery approach: SELECT the GROUP_CONCATs from q1 as scalar subqueries each with a GROUP BY.

A correct cumulative LEFT JOIN approach: LEFT JOIN q1 & q2--1:many--then GROUP BY & GROUP_CONCAT; then LEFT JOIN that & q3--1:many--then GROUP BY & GROUP_CONCAT.

A correct approach like your 2nd query: You first LEFT JOIN q1 & q2--1:many. Then you LEFT JOIN that & q3--many:1:many. It gives a row for every possible combination of a tag & a category that appear with a user_id. Then after you GROUP BY you GROUP_CONCAT--over duplicate (user_id, tag) pairs and duplicate (user_id, category) pairs. That is why you have duplicate list elements. But adding DISTINCT to GROUP_CONCAT gives a correct result. (Per wchiquito's comment.)

Which you prefer is as usual an engineering tradeoff to be informed by query plans & timings, per actual data/usage/statistics. input & stats for expected amount of duplication), timing of actual queries, etc. One issue is whether the extra rows of the many:1:many JOIN approach offset its saving of a GROUP BY.

-- cumulative LEFT JOIN approach
SELECT
q1.user_id, q1.user_name, q1.score, q1.reputation,
top_two_tags,
substring_index(group_concat(q3.category ORDER BY q3.category_reputation DESC SEPARATOR ','), ',', 2) AS category
FROM
-- your 1st query (less ORDER BY) AS q1
(SELECT
q1.user_id, q1.user_name, q1.score, q1.reputation,
substring_index(group_concat(q2.tag ORDER BY q2.tag_reputation DESC SEPARATOR ','), ',', 2) AS top_two_tags
FROM
(SELECT
u.id AS user_Id,
u.user_name,
coalesce(sum(r.score), 0) as score,
coalesce(sum(r.reputation), 0) as reputation
FROM
users u
LEFT JOIN reputations r
ON r.user_id = u.id
AND r.date_time > 1500584821 /* unix_timestamp(DATE_SUB(now(), INTERVAL 1 WEEK)) */
GROUP BY
u.id, u.user_name
) AS q1
LEFT JOIN
(
SELECT
r.user_id AS user_id, t.tag, sum(r.reputation) AS tag_reputation
FROM
reputations r
JOIN post_tag pt ON pt.post_id = r.post_id
JOIN tags t ON t.id = pt.tag_id
WHERE
r.date_time > 1500584821 /* unix_timestamp(DATE_SUB(now(), INTERVAL 1 WEEK)) */
GROUP BY
user_id, t.tag
) AS q2
ON q2.user_id = q1.user_id
GROUP BY
q1.user_id, q1.user_name, q1.score, q1.reputation
) AS q1
-- finish like your 2nd query
LEFT JOIN
(
SELECT
r.user_id AS user_id, c.category, sum(r.reputation) AS category_reputation
FROM
reputations r
JOIN post_category ct ON ct.post_id = r.post_id
JOIN categories c ON c.id = ct.category_id
WHERE
r.date_time > 1500584821 /* unix_timestamp(DATE_SUB(now(), INTERVAL 1 WEEK)) */
GROUP BY
user_id, c.category
) AS q3
ON q3.user_id = q1.user_id
GROUP BY
q1.user_id, q1.user_name, q1.score, q1.reputation
ORDER BY
q1.reputation DESC, q1.score DESC ;

Eliminate duplicate rows inside GROUP_CONCAT

Since you're only getting the information for a single user, I would go with Salman A's answer; but if you were going for multiple (or all) users, and users tend to have lots of emails and numbers, this version could be faster.

SELECT emp.id, first_name, second_name, last_name, department, positions, ee.mails, en.numbers
FROM geography.employee AS emp
INNER JOIN (SELECT email_FK, GROUP_CONCAT(email, " ", email_type SEPARATOR " || ") AS mails
FROM geography.employee_email
GROUP BY email_FK
) AS ee ON emp.id = ee.email_FK
LEFT JOIN (SELECT number_FK, GROUP_CONCAT(number, " ", number_type SEPARATOR " || ") AS numbers
FROM geography.employee_number
GROUP BY number_FK
) AS en ON emp.id = en.number_FK
;


Related Topics



Leave a reply



Submit