FWIW, this is a CSV parser with testmethods that I located from a blog post (which no longer exists) - There could easily be better ways of doing this but it handles embedded commas and double quotes. Code is ca. 2011. I've used this before
public without sharing class CsvReader {
// Csv Reader -- courtesy of http://blog.nicocrm.com/2011/03/06/parse-csv-file-in-salesforce-apex/
//
// Usage: Call Constructor with entire Csv Stream then call readLine to get array of string tokens for each line; first row will be header row. readLine returns null at end of stream
private String delim = ',';
private String[] buffer; // the input data for entire CSV file
private Integer maxInputLenToAvoidRegex;
// ------------------------------
// Constructors
// ------------------------------
public CsvReader(String data){
this.buffer = (data == null ? new List<String>() : data.split('\n'));
}
public CsvReader(String data, String delim){
this.buffer = (data == null ? new List<String>() : data.split('\n'));
this.delim = delim;
}
public CsvReader(String data, Boolean avoidRegexLimit, Integer maxInputLenToAvoidRegex) {
// avoidRegexLimit -- apparently, APEX throws an exception on xx.split('\n') when applied to a large file, so, to work around, build the buffer through recursive splitting
// reference: http://boards.developerforce.com/t5/Apex-Code-Development/Pattern-and-Matcher-Question/m-p/135986
this.maxInputLenToAvoidRegex= maxInputLenToAvoidRegex != null ? maxInputLenToAvoidRegex : 262144;
if (!avoidRegexLimit) this.buffer = (data == null ? new List<String>() : data.split('\n'));
else
if (data != null && avoidRegexLimit) {
Integer pivot = data.indexOf('\n',Integer.valueOf(Math.floor(data.length() / 2)) ); // determine first split point
if (pivot > this.maxInputLenToAvoidRegex) {
this.buffer = new List<String> ();
divideStringIntoLines(data);
}
else
this.buffer = data.split('\n');
}
else // null data
this.buffer = new List<String> ();
}
public String[] getBuffer() {return this.buffer;}
// -------------------------------------
// divideStringIntoLines (Recursive) - avoids regex too complicated exception on split('\n') when string is large
// -------------------------------------
private void divideStringIntoLines(String inVal) {
if (inVal == null || inVal.length() == 0) return;
Integer pivot = inVal.indexOf('\n',Integer.valueOf(Math.floor(inVal.length() / 2)) ); // determine if inVal is too large
String left;
String right;
/* System.debug(LoggingLevel.INFO,'divideStringIntoLines, inVal=' + inVal +
' \npivot =' + pivot); */
if (pivot != -1) {
left = inVal.substring(0,pivot);
right = inVal.substring(pivot);
/* System.debug(LoggingLevel.INFO,
' \nleft =' + left +
' \nright =' + right); */
if (pivot < this.maxInputLenToAvoidRegex) {
List<String> leftLines = left.split('\n');
List<String> rightLines = right.split('\n');
for (String sl: leftLines)
if (sl.length() > 0) this.buffer.add(sl);
for (String sr: rightLines)
if (sr.length() > 0) this.buffer.add(sr);
}
else { // recurse as inVal's pivot is bigger than size that regex split can handle
divideStringIntoLines(left);
divideStringIntoLines(right);
}
}
else {
List<String> neitherLeftRightLines = inVal.split('\n');
for (String s: neitherLeftRightLines)
if (s.length() > 0) this.buffer.add(s);
}
}
// -------------------------------
// readLine - returns array of csv tokens as strings; reads through buffer, removing from buffer as each line is located in toto. Return null if end of stream.
// -------------------------------
public String[] readLine(){
if(this.buffer.size() == 0) return null;
String line = this.buffer.remove(0); // grab first part of stream up to newline; remove from buffer
String[] parts = new String[] {}; // result list of tokens for one line
while(line != ''){
Integer next = 0;
if(line.startsWith('"')){
line = line.substring(1); // strip initial "
Integer quoteIndex = findQuote(line, 0); // Look for closing " on same line
while(quoteIndex == -1){ // not found, we must have a newline within a quoted token
if(buffer.size() == 0){
// EOT!
quoteIndex = line.length();
}
else {
// grab the next line and look to see if closing " can be found
Integer skip = line.length();
line += '\n' + this.buffer.remove(0);
quoteIndex = findQuote(line, skip);
}
}
// we have a quoted token, advance to comma
next = quoteIndex + 1;
parts.add(line.substring(0, quoteIndex).replace('""', '"'));
}
else { // non-quoted token, token end is at delim
next = line.indexOf(this.delim, next);
if(next == -1)
next = line.length();
// NB in Substring, "endindex" is the index of the character AFTER the last index to get
parts.add(line.substring(0, next));
}
if(next == line.length() - 1)
// case of a terminating comma.
parts.add('');
line = next < line.length() ? line.substring(next+1) : '';
}
if(parts.size() == 0)
// empty string - we still want to return something...
parts.add('');
return parts;
}
static private Pattern quotePattern = Pattern.compile('(?<!")"(?!")');
// -------------------------------------------------
// Helper: findQuote - find next quote " in line
private Integer findQuote(String line, Integer skip){
Matcher m = quotePattern.matcher(line);
m.region(skip, m.regionEnd());
if(!m.find())
return -1;
return m.start();
}
static testmethod void testSplitCsvSimple(){
String line = 'abc,efg';
String[] splitted = new CsvReader(line).readLine();
System.assertEquals(2, splitted.size());
System.assertEquals('efg', splitted[1]);
System.assertEquals('abc', splitted[0]);
}
static testmethod void testSplitCsvEOL(){
String line = 'abc,';
String[] splitted = new CsvReader(line).readLine();
System.assertEquals(2, splitted.size());
System.assertEquals('', splitted[1]);
System.assertEquals('abc', splitted[0]);
}
static testmethod void testSplitCsvQuotedSimple(){
String line = '"abc,def"';
String[] splitted = new CsvReader(line).readLine();
System.assertEquals('abc,def', splitted[0]);
}
static testmethod void testSplitCsvQuoted(){
String line = '"abc,def",gh"i,"jk""l",""';
String[] splitted = new CsvReader(line).readLine();
System.assertEquals(4, splitted.size());
System.assertEquals('gh"i', splitted[1]);
System.assertEquals('abc,def', splitted[0]);
System.assertEquals('jk"l', splitted[2]);
System.assertEquals('', splitted[3]);
}
static testmethod void testSplitCsvQuotedWithNewLine(){
String line = '"abc,def\nBoo\nBoo",Test';
CsvReader reader = new CsvReader(line);
String[] splitted = reader.readLine();
System.assertEquals('abc,def\nBoo\nBoo', splitted[0]);
System.assertEquals('Test', splitted[1]);
System.assertEquals(null, reader.readLine());
}
static testmethod void testSplitCsvQuotedWithEOT(){
String line = '"abc,def\nBoo';
CsvReader reader = new CsvReader(line);
String[] splitted = reader.readLine();
System.assertEquals('abc,def\nBoo', splitted[0]);
System.assertEquals(null, reader.readLine());
}
static testmethod void testTabDelim(){
String line = 'abc\tdef';
CsvReader reader = new CsvReader(line, '\t');
String[] splitted = reader.readLine();
System.assertEquals('abc', splitted[0]);
}
static testmethod void testSemiColonDelim(){
String line = 'abc;def;';
CsvReader reader = new CsvReader(line, ';');
String[] splitted = reader.readLine();
System.assertEquals('abc', splitted[0]);
System.assertEquals('def', splitted[1]);
}
static testmethod void testEmptyStrings(){
String line = ',,,,';
CsvReader reader = new CsvReader(line);
String[] splitted = reader.readLine();
System.assertEquals(5, splitted.size());
for(String s: splitted){
System.assertEquals('', s);
}
}
// make sure we still get a result even if the source is empty...
static testmethod void testEmptyString(){
String line = '';
CsvReader reader = new CsvReader(line);
String[] splitted = reader.readLine();
System.assertEquals(1, splitted.size());
System.assertEquals('', splitted[0]);
}
// test that avoidRegexException recursive split works - lines reassembled in correct order, nothing lost
static testMethod void testAvoidARegex() {
// test 00 - simple split with pivot len = 4
String[] expRes = new List<String> {'00','01','02','03','04','05','06','07','08','09','10','11'};
String line = '00\n01\n02\n03\n04\n05\n06\n07\n08\n09\n10\n11';
CsvReader reader = new CsvReader(line,true,4);
String[] res = reader.getBuffer();
for (Integer i = 0; i < res.size(); i++) {
System.debug(LoggingLevel.INFO,'[' + i +'] ' + res[i]);
System.assertEquals(expRes[i], res[i]);
}
// test 01 - simple split with pivot len = 3 - to hit the \n
reader = new CsvReader(line,true,3);
res = reader.getBuffer();
for (Integer i = 0; i < res.size(); i++) {
System.debug(LoggingLevel.INFO,'[' + i +'] ' + res[i]);
System.assertEquals(expRes[i], res[i]);
}
// test 02 - simple split with pivot len = 1 - to hit everychar
reader = new CsvReader(line,true,1);
res = reader.getBuffer();
for (Integer i = 0; i < res.size(); i++) {
System.debug(LoggingLevel.INFO,'[' + i +'] ' + res[i]);
System.assertEquals(expRes[i], res[i]);
}
// test 03 - change length of each line
expRes = new List<String> {'000','01','002','03','0004','05','000006','07','08','009','10','11'};
line = '000\n01\n002\n03\n0004\n05\n000006\n07\n08\n009\n10\n11';
reader = new CsvReader(line,true,5);
res = reader.getBuffer();
for (Integer i = 0; i < res.size(); i++) {
System.debug(LoggingLevel.INFO,'[' + i +'] ' + res[i]);
System.assertEquals(expRes[i], res[i]);
}
// test 04 - use very large pivot > file size
reader = new CsvReader(line,true,null);
res = reader.getBuffer();
for (Integer i = 0; i < res.size(); i++) {
System.debug(LoggingLevel.INFO,'[' + i +'] ' + res[i]);
System.assertEquals(expRes[i], res[i]);
}
}
}