Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 44 additions & 69 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -1097,11 +1097,8 @@ License: MIT
}

var parserConfig = copy(_config);
if (_config.preview && _config.header)
parserConfig.preview++; // to compensate for header row

_input = input;
_parser = new Parser(parserConfig);
_parser = new Parser(parserConfig, _fields);
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
processResults();
return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
Expand Down Expand Up @@ -1178,8 +1175,9 @@ License: MIT
});
}

if (needsHeaderRow())
fillHeaderFields();
if (needsHeaderRow() && _results.meta.fields) {
_fields = _fields.concat(_results.meta.fields);
}

return applyHeaderAndDynamicTypingAndTransformation();
}
Expand All @@ -1189,31 +1187,6 @@ License: MIT
return _config.header && _fields.length === 0;
}

function fillHeaderFields()
{
if (!_results)
return;

function addHeader(header, i)
{
if (isFunction(_config.transformHeader))
header = _config.transformHeader(header, i);

_fields.push(header);
}

if (Array.isArray(_results.data[0]))
{
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
_results.data[i].forEach(addHeader);

_results.data.splice(0, 1);
}
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
else
_results.data.forEach(addHeader);
}

function shouldApplyDynamicTyping(field) {
// Cache function values to avoid calling it for each row
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
Expand Down Expand Up @@ -1316,7 +1289,7 @@ License: MIT
delimiter: delim,
newline: newline,
preview: 10
}).parse(input);
}, _fields).parse(input);

for (var j = 0; j < preview.data.length; j++) {
if (skipEmptyLines && testEmptyLine(preview.data[j])) {
Expand Down Expand Up @@ -1402,7 +1375,7 @@ License: MIT
}

/** The core parser implements speedy and correct CSV parsing */
function Parser(config)
function Parser(config, _fields)
{
// Unpack the config object
config = config || {};
Expand Down Expand Up @@ -1444,6 +1417,7 @@ License: MIT
// We're gonna need these at the Parser scope
var cursor = 0;
var aborted = false;
var fields = _fields;

this.parse = function(input, baseIndex, ignoreLastRow)
{
Expand All @@ -1466,40 +1440,6 @@ License: MIT
if (!input)
return returnable();

// Rename headers if there are duplicates
if (config.header && !baseIndex)
{
var firstLine = input.split(newline)[0];
var headers = firstLine.split(delim);
var separator = '_';
var headerMap = [];
var headerCount = {};
var duplicateHeaders = false;

for (var j in headers) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
duplicateHeaders = true;
headerName = header + separator + count;
}
headerCount[header] = count + 1;
// In case it already exists, we add more separtors
while (headerMap.includes(headerName)) {
headerName = headerName + separator + count;
}
headerMap.push(headerName);
}
if (duplicateHeaders) {
var editedInput = input.split(newline);
editedInput[0] = headerMap.join(delim);
input = editedInput.join(newline);
}
}
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
{
var rows = input.split(newline);
Expand Down Expand Up @@ -1707,10 +1647,40 @@ License: MIT

function pushRow(row)
{
data.push(row);
if (config.header && (fields.length === 0))
buildHeaders(row);
else
data.push(row);
lastCursor = cursor;
}

function buildHeaders(headers) {
// duplicate headers will have '_x" appended to them.
var separator = '_';
var headerMap = [];
var headerCount = {};

for (var j in headers) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
headerName = header + separator + count;
}
headerCount[header] = count + 1;
// In case it already exists, we add more separtors
while (headerMap.includes(headerName)) {
headerName = headerName + separator + count;
}
headerMap.push(headerName);
}

fields = headerMap;
}

/**
* checks if there are extra spaces after closing quote and given index without any text
* if Yes, returns the number of spaces
Expand Down Expand Up @@ -1761,7 +1731,7 @@ License: MIT
/** Returns an object with the results, errors, and meta. */
function returnable(stopped)
{
return {
var results = {
data: data,
errors: errors,
meta: {
Expand All @@ -1772,6 +1742,11 @@ License: MIT
cursor: lastCursor + (baseIndex || 0)
}
};

if (config.header && fields.length > 0)
results.meta.fields = fields.concat([]);

return results;
}

/** Executes the user's step function and resets data & errors. */
Expand Down
62 changes: 55 additions & 7 deletions tests/test-cases.js
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes to existing tests were required to reflect that:

  • Parser now requires an array as its second argument for its constructor
  • Parser.parse() no longer returns the header row as data

Original file line number Diff line number Diff line change
Expand Up @@ -591,34 +591,43 @@ var CORE_PARSER_TESTS = [
input: 'A,A,A,A\n1,2,3,4',
config: { header: true },
expected: {
data: [['A', 'A_1', 'A_2', 'A_3'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['A', 'A_1', 'A_2', 'A_3']
}
}
},
{
description: "Duplicate header names with headerTransform",
input: 'A,A,A,A\n1,2,3,4',
config: { header: true, transformHeader: function(header) { return header.toLowerCase(); } },
expected: {
data: [['a', 'a_1', 'a_2', 'a_3'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['a', 'a_1', 'a_2', 'a_3']
}
}
},
{
description: "Duplicate header names existing column",
input: 'c,c,c,c_1\n1,2,3,4',
config: { header: true },
expected: {
data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
errors: []
data: [['1', '2', '3', '4']],
errors: [],
meta: {
fields: ['c', 'c_1', 'c_2', 'c_1_0']
}
}
},
];

describe('Core Parser Tests', function() {
function generateTest(test) {
(test.disabled ? it.skip : it)(test.description, function() {
var actual = new Papa.Parser(test.config).parse(test.input);
var actual = new Papa.Parser(test.config, []).parse(test.input);
assert.deepEqual(actual.errors, test.expected.errors);
assert.deepEqual(actual.data, test.expected.data);
});
Expand Down Expand Up @@ -2674,6 +2683,45 @@ var CUSTOM_TESTS = [
});
}
},
{
description: "Pause and resume works with headers and duplicate fields (Regression Test for Bug #985)",
expected: [[
["Column 1", "Column 2", "Column 3", "Column 4"],
["Column 1", "Column 2", "Column 3", "Column 4"],
], [
{ "Column 1": "R1C1", "Column 2": "", "Column 3": "R1C3", "Column 4": "" },
{ "Column 1": "R2C1", "Column 2": "", "Column 3": "", "Column 4": "" },
]],
run: function(callback) {
var inputString = [
"Column 1,Column 2,Column 3,Column 4",
"R1C1,,R1C3,",
"R2C1,,,"
].join("\n");
var output = [];
var dataRows = [];
var headerResults = [];
Papa.parse(inputString, {
header: true,
step: function(results, parser) {
if (results)
{
headerResults.push(results.meta.fields);
parser.pause();
parser.resume();
if (results.data) {
dataRows.push(results.data);
}
}
},
complete: function() {
output.push(headerResults);
output.push(dataRows);
callback(output);
}
});
}
},
];

describe('Custom Tests', function() {
Expand Down