Sql: Parse the First, Middle and Last Name from a Fullname Field

SQL: parse the first, middle and last name from a fullname field

Here is a self-contained example, with easily manipulated test data.

With this example, if you have a name with more than three parts, then all the "extra" stuff will get put in the LAST_NAME field. An exception is made for specific strings that are identified as "titles", such as "DR", "MRS", and "MR".

If the middle name is missing, then you just get FIRST_NAME and LAST_NAME (MIDDLE_NAME will be NULL).

You could smash it into a giant nested blob of SUBSTRINGs, but readability is hard enough as it is when you do this in SQL.

Edit-- Handle the following special cases:

1 - The NAME field is NULL

2 - The NAME field contains leading / trailing spaces

3 - The NAME field has > 1 consecutive space within the name

4 - The NAME field contains ONLY the first name

5 - Include the original full name in the final output as a separate column, for readability

6 - Handle a specific list of prefixes as a separate "title" column

SELECT
FIRST_NAME.ORIGINAL_INPUT_DATA
,FIRST_NAME.TITLE
,FIRST_NAME.FIRST_NAME
,CASE WHEN 0 = CHARINDEX(' ',FIRST_NAME.REST_OF_NAME)
THEN NULL --no more spaces? assume rest is the last name
ELSE SUBSTRING(
FIRST_NAME.REST_OF_NAME
,1
,CHARINDEX(' ',FIRST_NAME.REST_OF_NAME)-1
)
END AS MIDDLE_NAME
,SUBSTRING(
FIRST_NAME.REST_OF_NAME
,1 + CHARINDEX(' ',FIRST_NAME.REST_OF_NAME)
,LEN(FIRST_NAME.REST_OF_NAME)
) AS LAST_NAME
FROM
(
SELECT
TITLE.TITLE
,CASE WHEN 0 = CHARINDEX(' ',TITLE.REST_OF_NAME)
THEN TITLE.REST_OF_NAME --No space? return the whole thing
ELSE SUBSTRING(
TITLE.REST_OF_NAME
,1
,CHARINDEX(' ',TITLE.REST_OF_NAME)-1
)
END AS FIRST_NAME
,CASE WHEN 0 = CHARINDEX(' ',TITLE.REST_OF_NAME)
THEN NULL --no spaces @ all? then 1st name is all we have
ELSE SUBSTRING(
TITLE.REST_OF_NAME
,CHARINDEX(' ',TITLE.REST_OF_NAME)+1
,LEN(TITLE.REST_OF_NAME)
)
END AS REST_OF_NAME
,TITLE.ORIGINAL_INPUT_DATA
FROM
(
SELECT
--if the first three characters are in this list,
--then pull it as a "title". otherwise return NULL for title.
CASE WHEN SUBSTRING(TEST_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(TEST_DATA.FULL_NAME,1,3)))
ELSE NULL
END AS TITLE
--if you change the list, don't forget to change it here, too.
--so much for the DRY prinicple...
,CASE WHEN SUBSTRING(TEST_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
THEN LTRIM(RTRIM(SUBSTRING(TEST_DATA.FULL_NAME,4,LEN(TEST_DATA.FULL_NAME))))
ELSE LTRIM(RTRIM(TEST_DATA.FULL_NAME))
END AS REST_OF_NAME
,TEST_DATA.ORIGINAL_INPUT_DATA
FROM
(
SELECT
--trim leading & trailing spaces before trying to process
--disallow extra spaces *within* the name
REPLACE(REPLACE(LTRIM(RTRIM(FULL_NAME)),' ',' '),' ',' ') AS FULL_NAME
,FULL_NAME AS ORIGINAL_INPUT_DATA
FROM
(
--if you use this, then replace the following
--block with your actual table
SELECT 'GEORGE W BUSH' AS FULL_NAME
UNION SELECT 'SUSAN B ANTHONY' AS FULL_NAME
UNION SELECT 'ALEXANDER HAMILTON' AS FULL_NAME
UNION SELECT 'OSAMA BIN LADEN JR' AS FULL_NAME
UNION SELECT 'MARTIN J VAN BUREN SENIOR III' AS FULL_NAME
UNION SELECT 'TOMMY' AS FULL_NAME
UNION SELECT 'BILLY' AS FULL_NAME
UNION SELECT NULL AS FULL_NAME
UNION SELECT ' ' AS FULL_NAME
UNION SELECT ' JOHN JACOB SMITH' AS FULL_NAME
UNION SELECT ' DR SANJAY GUPTA' AS FULL_NAME
UNION SELECT 'DR JOHN S HOPKINS' AS FULL_NAME
UNION SELECT ' MRS SUSAN ADAMS' AS FULL_NAME
UNION SELECT ' MS AUGUSTA ADA KING ' AS FULL_NAME
) RAW_DATA
) TEST_DATA
) TITLE
) FIRST_NAME

Splitting a Full Name into First and Last Name

Keeping it short and simple

DECLARE @t TABLE(Fullname varchar(40))
INSERT @t VALUES('John Paul White'),('Peter Smith'),('Thomas')

SELECT
LEFT(Fullname, LEN(Fullname) - CHARINDEX(' ', REVERSE(FullName))) FirstName,
STUFF(RIGHT(FullName, CHARINDEX(' ', REVERSE(FullName))),1,1,'') LastName
FROM
@t

Result:

FirstName  LastName
John Paul White
Peter Smith
Thomas NULL

How To Split full Name into First Name, Middle Name, Last Name and suffix in TSQL


SELECT 

d.First_Name


,CASE WHEN 0 = CHARINDEX(' ',d.REST_OF_NAME)
THEN NULL
ELSE SUBSTRING( ---- finds the middle name from rest of the name
d.REST_OF_NAME
,1
,CHARINDEX(' ',d.REST_OF_NAME)-1
)
END AS Middle_Name

,SUBSTRING(
d.REST_OF_NAME ---- finds the Last name from rest of the name
,1 + CHARINDEX(' ', d.REST_OF_NAME)
,LEN( d.REST_OF_NAME)
) AS Last_Name

,d.Suffix
,d.CUSTOMER_NUMBER
,D.Orignal_Data_String
from
(SELECT c.Suffix,

CASE WHEN 0 = CHARINDEX(' ',c.Remainding_Name_Part)
THEN c.Remainding_Name_Part
ELSE SUBSTRING( ---- substring first name fro rest of the name from reminding part of the name
c.Remainding_Name_Part
,1
,CHARINDEX(' ',c.Remainding_Name_Part)-1
)
END AS First_Name
,CASE WHEN 0 = CHARINDEX(' ',c.Remainding_Name_Part)
THEN NULL
ELSE SUBSTRING(
c.Remainding_Name_Part
,CHARINDEX(' ',c.Remainding_Name_Part)+1 ------ substring rest of the name after substracting firstname from the remainding partof the name
,LEN(c.Remainding_Name_Part)
)
END AS REST_OF_NAME

,c.CUSTOMER_NUMBER
,C.Orignal_Data_String
FROM
(SELECT
CASE WHEN RIGHT(b.Name,2) IN ('IV','Jr','Sr')
THEN LTRIM(RTRIM(RIGHT(b.Name,2))) ----finds suffix in name
WHEN RIGHT(b.Name,3) IN ('III','Esq',' II')
THEN LTRIM(RTRIM(RIGHT(b.Name,3)))

ELSE NULL
END AS [Suffix]
,
CASE WHEN RIGHT(b.Name,2) IN ('IV','Jr','Sr')
THEN LTRIM(RTRIM(LEFT(b.name,LEN(b.name)-2))) ----finds remider part of name after subtrecting suffix
WHEN RIGHT(b.Name,3) IN ('III',' Esq',' II')
THEN LTRIM(RTRIM(LEFT(b.name,LEN(b.name)-3)))

ELSE LTRIM(RTRIM(b.name))
END AS [Remainding_Name_Part]
,B.CUSTOMER_NUMBER
,B.Orignal_Data_String


FROM

(SELECT
REPLACE(REPLACE(LTRIM(RTRIM(a.NAME)),' ',' '),' ',' ') AS [Name] ------ Clears spaces
,A.NAME AS [Orignal_Data_String]
,a.CUSTOMER_NUMBER
FROM
(
SELECT NAME,CUSTOMER_NUMBER ------ finds the customers
FROM [FIS_CORE_FEEDS_DM].[dbo].[FIS_DAILY_CUST_TABLE]
WHERE CUSTOMER_TYPE !='O'
)A
)B
)C
)D

Split Full Name with Format: {Last, First Middle} Comprehensive Cases

Please note the following:

  1. Always request normalized data to ensure the highest data quality. I tried to enumerate all possible cases for last, first, and middle name combinations but I'm sure I did not get all of them.
  2. My script requires the format: LastName@DELIMITER1@DELIMITER2FirstName@DELIMITER2MiddleName, but can be easily altered for other formats.
  3. This script does not separate tiles like Dr., or handle suffixes.
  4. Credit to MemKills for the idea of the test data set, which I expanded.

>

DECLARE @DELIMITER1 varchar(1), @DELIMITER2 varchar(1), @MAX_LENGTH int
SET @DELIMITER1 = ','
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50

SELECT [Name],
SUBSTRING(Name,1,CHARINDEX(@DELIMITER1,Name) -1) AS LastName, -- Less one char for @DELIMITER1
SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH) AS FirstAndMiddle, -- Plus two for @DELIMITER1 and @DELIMITER2
CASE
-- Middle name follows two-name first names like Mary Ann
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
THEN SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH)
ELSE NULL
END AS MiddleName,

CASE
-- Count the number of @DELIMITER2. Choose the string between the @DELIMITER1 and the final @DELIMITER2.
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
Then SUBSTRING(Name, CHARINDEX(@DELIMITER1,Name)+ 2,
(LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH))
- LEN(SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH))))
ELSE SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)
END AS FirstName
FROM
(
SELECT [Name] = 'Zzz, A' UNION ALL
SELECT 'de Zzz, Aaa' UNION ALL
SELECT 'Zzz, Aaaa' UNION ALL
SELECT 'Zzz, A B' UNION ALL
SELECT 'Zzz, Aaaa Bbbb' UNION ALL
SELECT 'de Zzz, Aaaa' UNION ALL
SELECT 'de Zzz, Aaaa B' UNION ALL
SELECT 'van Zzz, Aaaa B' UNION ALL
SELECT 'Yyy-Zzz, Aaaa B' UNION ALL
SELECT 'd''Zzz, Aaaa B' UNION ALL
SELECT 'Zzz, Aaaa Bbbb C' UNION ALL
SELECT 'Zzz, Aaaa Bbbb Cccc'
) AS X


Related Topics



Leave a reply



Submit