mholt · landisdesign · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/papaparse.js b/papaparse.js
@@ -1097,11 +1097,8 @@ License: MIT
 			}
 
 			var parserConfig = copy(_config);
-			if (_config.preview && _config.header)
-				parserConfig.preview++;	// to compensate for header row
-
 			_input = input;
-			_parser = new Parser(parserConfig);
+			_parser = new Parser(parserConfig, _fields);
 			_results = _parser.parse(_input, baseIndex, ignoreLastRow);
 			processResults();
 			return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
@@ -1178,8 +1175,9 @@ License: MIT
 				});
 			}
 
-			if (needsHeaderRow())
-				fillHeaderFields();
+			if (needsHeaderRow() && _results.meta.fields) {
+				_fields = _fields.concat(_results.meta.fields);
+			}
 
 			return applyHeaderAndDynamicTypingAndTransformation();
 		}
@@ -1189,31 +1187,6 @@ License: MIT
 			return _config.header && _fields.length === 0;
 		}
 
-		function fillHeaderFields()
-		{
-			if (!_results)
-				return;
-
-			function addHeader(header, i)
-			{
-				if (isFunction(_config.transformHeader))
-					header = _config.transformHeader(header, i);
-
-				_fields.push(header);
-			}
-
-			if (Array.isArray(_results.data[0]))
-			{
-				for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
-					_results.data[i].forEach(addHeader);
-
-				_results.data.splice(0, 1);
-			}
-			// if _results.data[0] is not an array, we are in a step where _results.data is the row.
-			else
-				_results.data.forEach(addHeader);
-		}
-
 		function shouldApplyDynamicTyping(field) {
 			// Cache function values to avoid calling it for each row
 			if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
@@ -1316,7 +1289,7 @@ License: MIT
 					delimiter: delim,
 					newline: newline,
 					preview: 10
-				}).parse(input);
+				}, _fields).parse(input);
 
 				for (var j = 0; j < preview.data.length; j++) {
 					if (skipEmptyLines && testEmptyLine(preview.data[j])) {
@@ -1402,7 +1375,7 @@ License: MIT
 	}
 
 	/** The core parser implements speedy and correct CSV parsing */
-	function Parser(config)
+	function Parser(config, _fields)
 	{
 		// Unpack the config object
 		config = config || {};
@@ -1444,6 +1417,7 @@ License: MIT
 		// We're gonna need these at the Parser scope
 		var cursor = 0;
 		var aborted = false;
+		var fields = _fields;
 
 		this.parse = function(input, baseIndex, ignoreLastRow)
 		{
@@ -1466,40 +1440,6 @@ License: MIT
 			if (!input)
 				return returnable();
 
-			// Rename headers if there are duplicates
-			if (config.header && !baseIndex)
-			{
-				var firstLine = input.split(newline)[0];
-				var headers = firstLine.split(delim);
-				var separator = '_';
-				var headerMap = [];
-				var headerCount = {};
-				var duplicateHeaders = false;
-
-				for (var j in headers) {
-					var header = headers[j];
-					if (isFunction(config.transformHeader))
-						header = config.transformHeader(header, j);
-					var headerName = header;
-
-					var count = headerCount[header] || 0;
-					if (count > 0) {
-						duplicateHeaders = true;
-						headerName = header + separator + count;
-					}
-					headerCount[header] = count + 1;
-					// In case it already exists, we add more separtors
-					while (headerMap.includes(headerName)) {
-						headerName = headerName + separator + count;
-					}
-					headerMap.push(headerName);
-				}
-				if (duplicateHeaders) {
-					var editedInput = input.split(newline);
-					editedInput[0] = headerMap.join(delim);
-					input = editedInput.join(newline);
-				}
-			}
 			if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
 			{
 				var rows = input.split(newline);
@@ -1707,10 +1647,40 @@ License: MIT
 
 			function pushRow(row)
 			{
-				data.push(row);
+				if (config.header && (fields.length === 0))
+					buildHeaders(row);
+				else
+					data.push(row);
 				lastCursor = cursor;
 			}
 
+			function buildHeaders(headers) {
+				// duplicate headers will have '_x" appended to them.
+				var separator = '_';
+				var headerMap = [];
+				var headerCount = {};
+
+				for (var j in headers) {
+					var header = headers[j];
+					if (isFunction(config.transformHeader))
+						header = config.transformHeader(header, j);
+					var headerName = header;
+
+					var count = headerCount[header] || 0;
+					if (count > 0) {
+						headerName = header + separator + count;
+					}
+					headerCount[header] = count + 1;
+					// In case it already exists, we add more separtors
+					while (headerMap.includes(headerName)) {
+						headerName = headerName + separator + count;
+					}
+					headerMap.push(headerName);
+				}
+
+				fields = headerMap;
+			}
+
 			/**
              * checks if there are extra spaces after closing quote and given index without any text
              * if Yes, returns the number of spaces
@@ -1761,7 +1731,7 @@ License: MIT
 			/** Returns an object with the results, errors, and meta. */
 			function returnable(stopped)
 			{
-				return {
+				var results = {
 					data: data,
 					errors: errors,
 					meta: {
@@ -1772,6 +1742,11 @@ License: MIT
 						cursor: lastCursor + (baseIndex || 0)
 					}
 				};
+
+				if (config.header && fields.length > 0)
+					results.meta.fields = fields.concat([]);
+
+				return results;
 			}
 
 			/** Executes the user's step function and resets data & errors. */

diff --git a/tests/test-cases.js b/tests/test-cases.js
@@ -591,34 +591,43 @@ var CORE_PARSER_TESTS = [
 		input: 'A,A,A,A\n1,2,3,4',
 		config: { header: true },
 		expected: {
-			data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']],
-			errors: []
+			data: [['1', '2', '3', '4']],
+			errors: [],
+			meta: {
+				fields: ['A', 'A_1', 'A_2', 'A_3']
+			}
 		}
 	},
 	{
 		description: "Duplicate header names with headerTransform",
 		input: 'A,A,A,A\n1,2,3,4',
 		config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } },
 		expected: {
-			data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']],
-			errors: []
+			data: [['1', '2', '3', '4']],
+			errors: [],
+			meta: {
+				fields: ['a', 'a_1', 'a_2', 'a_3']
+			}
 		}
 	},
 	{
 		description: "Duplicate header names existing column",
 		input: 'c,c,c,c_1\n1,2,3,4',
 		config: { header: true },
 		expected: {
-			data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
-			errors: []
+			data: [['1', '2', '3', '4']],
+			errors: [],
+			meta: {
+				fields: ['c', 'c_1', 'c_2', 'c_1_0']
+			}
 		}
 	},
 ];
 
 describe('Core Parser Tests', function() {
 	function generateTest(test) {
 		(test.disabled ? it.skip : it)(test.description, function() {
-			var actual = new Papa.Parser(test.config).parse(test.input);
+			var actual = new Papa.Parser(test.config, []).parse(test.input);
 			assert.deepEqual(actual.errors, test.expected.errors);
 			assert.deepEqual(actual.data, test.expected.data);
 		});
@@ -2674,6 +2683,45 @@ var CUSTOM_TESTS = [
 			});
 		}
 	},
+	{
+		description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)",
+		expected: [[
+			["Column 1", "Column 2", "Column 3", "Column 4"],
+			["Column 1", "Column 2", "Column 3", "Column 4"],
+		], [
+			{ "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "" },
+			{ "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "" },
+		]],
+		run: function(callback) {
+			var inputString = [
+				"Column 1,Column 2,Column 3,Column 4",
+				"R1C1,,R1C3,",
+				"R2C1,,,"
+			].join("\n");
+			var output = [];
+			var dataRows = [];
+			var headerResults = [];
+			Papa.parse(inputString, {
+				header: true,
+				step: function(results, parser) {
+					if (results)
+					{
+						headerResults.push(results.meta.fields);
+						parser.pause();
+						parser.resume();
+						if (results.data) {
+							dataRows.push(results.data);
+						}
+					}
+				},
+				complete: function() {
+					output.push(headerResults);
+					output.push(dataRows);
+					callback(output);
+				}
+			});
+		}
+	},
 ];
 
 describe('Custom Tests', function() {