In Spanish it can be tricky because you may have a second optional name and even complex surnames like "del Bosque" or "de la Hoya", vowels with accent marks and the ñ. The following javascript is capabable of parsing a full spanish name, having in count you are writting it respecting the upper and lower cases. It will return a json giving you
- name: 1 or 2 main names
- lastName: the main lastname
- secondLastName: The second lastname
The code is:
function parseName(input) {
var fullName = input || "";
var result = {};
if (fullName.length > 0) {
var nameTokens = fullName.match(/(?:(?:[A-ZÁ-ÚÑÜ][a-zá-úñü]+){1,2}\s)|(?:(?:[aeodlsz]{1,3}[ ]){0,2}[A-ZÁ-ÚÑÜ][a-zá-úñü]+)/gms) || [];
if (nameTokens.length > 3) {
result.name = nameTokens.slice(0, 2).join(' ');
} else {
result.name = nameTokens.slice(0, 1).join(' ');
}
if (nameTokens.length > 2) {
result.lastName = nameTokens.slice(-2, -1).join(' ');
result.secondLastName = nameTokens.slice(-1).join(' ');
} else {
result.lastName = nameTokens.slice(-1).join(' ');
result.secondLastName = "";
}
}
return result;
}
The surnames are required if you are going to specify a second name. Try it out with:
- Vicente Hernández Planes
- Oscar de la Hoya
- José Julian Martí Pérez
- Manuel de Céspedes del Castillo
- Calixto García Íñiguez
Even try out a complex one like
- María de la Caridad del Bosque y Loynáz
Comment your experiences with it.
---- Update 04-2024 ------------------------
I verified this answer I gave 8 years ago and I made a noticeable change for a nicer code, better regexp and more nuanced response (adds middle-name):
function parseName(input) {
var fullName = input || "";
var result = {};
if (fullName.length > 0) {
var nameTokens = fullName.match(/(?:(?:[A-ZÁ-ÚÑÜ][a-zá-úñü]+){1,2})|(?:(?:[aeodlsz]{1,3}[ ]){0,2}[A-ZÁ-ÚÑÜ][a-zá-úñü]+)/gms) || [];
switch (nameTokens.length) {
case 1:
[result.name] = nameTokens;
break;
case 2:
[result.name, result.lastName] = nameTokens;
break;
case 3:
[result.name, result.lastName, result.secondLastName] = nameTokens;
break;
default:
[result.name, result.middleName, result.lastName, result.secondLastName] = nameTokens;
}
}
return result;
}
console.log(parseName("Marianela")); // A simple name
console.log(parseName("Peña Ávalos")); // Special characters (á) - Last name with two parts
console.log(parseName("Juan Felipe Hernández Borbón")); // A fully structured name
console.log(parseName("Sofía de las Mercedes y Grecia")); // Complex last name with conjunctions ("de las", "y")
console.log(parseName("Jorge Luis Borges-Acevedo")); // Double-barreled last name ("Borges-Acevedo")
console.log(parseName("María del Pilar Díaz")); // "del" as prefix of the last name
console.log(parseName("Xóchitl Sánchez")); // "X" prefix (rare but exists)
Note: This is a case sensitive regexp. If you want it to support last names with upcases just add the i flag to make it insensitive. For instance, change from /gms to /gmis.