What is semi-join in database?
Simple example. Let's select students with grades using left outer join:
SELECT DISTINCT s.id
FROM students s
LEFT JOIN grades g ON g.student_id = s.id
WHERE g.student_id IS NOT NULL
Now the same with left semi-join:
SELECT s.id
FROM students s
WHERE EXISTS (SELECT 1 FROM grades g
WHERE g.student_id = s.id)
The latter is generally more efficient (depending on concrete DBMS and query optimizer).
what is the difference between an anti-join and an anti semi join?
Anti-semijoin is anti join.
The antijoin can also be defined as the complement of the semijoin, as follows:
R ▷ S = R − R ⋉ S
Given this, the antijoin is sometimes called the anti-semijoin, and the antijoin operator is sometimes written as semijoin symbol with a bar above it, instead of ▷.
See https://en.wikipedia.org/wiki/Relational_algebra#Antijoin_.28.E2.96.B7.29
How to translate a simple semijoin query from a SQL query into JavaScript?
Some preliminary remarks:
The first query has a
NOT NULL
condition in itsWHERE
clause which you haven't specifically translated in your JS code. In fact, such a condition annihilates the special effect of the outer join, making it act as an inner join. So really the first query is:SELECT DISTINCT s.id
FROM students s
INNER JOIN grades g ON g.student_id = s.idThe desired output should really be a 2-dimensional array, because in general an SQL statement can select multiple expressions. So each output row could be an array in itself. In this case, I would expect the output to be
[[1], [2], [3]]
It is not really possible to have an exact translation of how a query is executed by a database engine, because database engines typically inspect tables and indexes to decide how exactly the query will be executed, and some of this may happen in real time, meaning that the same query may even be executed differently when the number of records has changed significantly in one of the involved tables.
The way the query will be executed (order of accessing the tables, which join to perform first, which index to use or not use, ...) is called the execution plan. This is also in line with the
original spirit of the SQL language: it was supposed to let the user
say what they want, and not how to do it.
Having said that, this question made me think of popular libraries that allow one to express queries with some hybrid syntax, where SQL clauses can be chained together via method calls.
So for instance, your two queries could then be written like this:
res = select("s.id")
.distinct()
.from(students, "s")
.from(grades, "g")
.where("g.student_id", "=", "s.id")
.exec();
And:
res = select("s.id")
.from(students, "s")
.whereExists(
select(1)
.from(grades, "g")
.where("g.student_id", "=", "s.id")
)
.exec();
Here is an implementation in JavaScript that allows the above expressions to work. It only supports a tiny subset of SQL, just enough to be able to tackle the constructs used in these two SQL statements:
class Query {
constructor() {
this.tables = {};
this.selections = [];
this.conditions = [];
this.exists = [];
this.isDistinct = false;
}
static parseField(field) {
try { // Try to interpret the argument as a literal value (like 1)
let value = JSON.parse(field);
return {value};
} catch(e) { // Otherwise interpret it as table.field
let [table, column] = field.split(".");
return {table, column};
}
}
distinct() {
this.isDistinct = true;
return this;
}
from(records, alias) {
this.tables[alias] = records;
return this;
}
where(fieldA, operator, fieldB) {
let {table: tableA, column: columnA} = Query.parseField(fieldA);
let {table: tableB, column: columnB} = Query.parseField(fieldB);
this.conditions.push({tableA, columnA, operator, tableB, columnB});
return this;
}
whereExists(subquery) {
this.exists.push({subquery, not: false});
return this;
}
whereNotExists(subquery) {
this.exists.push({subquery, not: true});
return this;
}
exec(parentRecords={}) {
// Perform cartesian product (by lack of index)
let max = Object.values(this.tables).reduce((size, {length}) => size * length, 1);
let results = [];
for (let i = 0; i < max; i++) {
let k = i;
// Select a record from each table
let records = {...parentRecords}; // Allow access to parent query
for (let alias in this.tables) {
let recId = k % this.tables[alias].length;
k = (k - recId) / this.tables[alias].length;
records[alias] = this.tables[alias][recId];
}
// Apply conditions
let include = this.conditions.every(({tableA, columnA, operator, tableB, columnB}) => {
let a = records[tableA][columnA];
let b = records[tableB][columnB];
if (operator == "=") return a === b;
if (operator == "<") return a < b;
if (operator == ">") return a > b;
// ...etc
}) && this.exists.every(({subquery, not}) => { // Where (NOT) EXIST
return not === !subquery.exec(records).length;
});
if (include) {
// Apply SELECT
results.push(this.selections.map(({value, table, column}) => value ?? records[table][column]));
}
}
// Apply DISTINCT
if (this.isDistinct) {
results = [...new Map(results.map(result => [JSON.stringify(result), result])).values()];
}
return results;
}
}
function select(...fields) {
let query = new Query;
query.selections = fields.map(Query.parseField);
return query;
}
// Example run
const students = [
{ id: 1 },
{ id: 2 },
{ id: 3 },
{ id: 4 },
{ id: 5 },
{ id: 6 },
]
const grades = [
{ id: 1, student_id: 1 },
{ id: 2, student_id: 1 },
{ id: 3, student_id: 1 },
{ id: 4, student_id: 1 },
{ id: 5, student_id: 1 },
{ id: 6, student_id: 1 },
{ id: 10, student_id: 2 },
{ id: 20, student_id: 2 },
{ id: 30, student_id: 2 },
{ id: 40, student_id: 2 },
{ id: 50, student_id: 2 },
{ id: 60, student_id: 2 },
{ id: 100, student_id: 3 },
{ id: 200, student_id: 3 },
{ id: 300, student_id: 3 },
{ id: 400, student_id: 3 },
{ id: 500, student_id: 3 },
{ id: 600, student_id: 3 },
]
let res1 = select("s.id")
.distinct()
.from(students, "s")
.from(grades, "g")
.where("g.student_id", "=", "s.id")
.exec();
console.log(res1);
let res2 = select("s.id")
.from(students, "s")
.whereExists(
select(1)
.from(grades, "g")
.where("g.student_id", "=", "s.id")
)
.exec();
console.log(res2);
Related Topics
How to Delete a Fixed Number of Rows with Sorting in Postgresql
How to Pass a List as a Parameter in a Stored Procedure
Is There a SQL Implementation of Pbkdf2
Automate Version Number Retrieval from .Dtsx Files
When to Denormalize a Database Design
Drop Function Without Knowing the Number/Type of Parameters
How to Get Return Value of a Stored Procedure
Should I Use SQL_Variant Data Type
Select from Table with Varying in List in Where Clause
Find Rows That Have the Same Value on a Column in MySQL
How to Store a List in a Db Column
Closing Connection When Using Dapper
SQL Speed Up Performance of Insert
How to Find a Table Having a Specific Column in Postgresql
How to Create a Pivot Query in SQL Server Without Aggregate Function