feat(csv-loader): strip comments before parsing CSV
Pre-filter comment lines from the content before passing it to `csv-parse`. This prevents quote parsing errors when comment lines contain double quotes and simplifies the record filtering logic.
This commit is contained in:
parent
5a1627c6f1
commit
9a2db5edb6
|
|
@ -40,6 +40,24 @@ describe("parseCsv - basic parsing", () => {
|
|||
expect(result.data[1]).toEqual({ id: 2, count: 3, price: 4.5 });
|
||||
});
|
||||
|
||||
it("should parse CSV with non-ASCII characters and comments", () => {
|
||||
const csv = [
|
||||
'# id: unique intent state ID (e.g. "仙人掌怪-boost")',
|
||||
"id",
|
||||
"string",
|
||||
"仙人掌怪-boost",
|
||||
"仙人掌怪-defend",
|
||||
"仙人掌怪-attack",
|
||||
].join("\n");
|
||||
|
||||
const result = parseCsv(csv, { emitTypes: false });
|
||||
|
||||
expect(result.data).toHaveLength(3);
|
||||
expect(result.data[0]).toEqual({ id: "仙人掌怪-boost" });
|
||||
expect(result.data[1]).toEqual({ id: "仙人掌怪-defend" });
|
||||
expect(result.data[2]).toEqual({ id: "仙人掌怪-attack" });
|
||||
});
|
||||
|
||||
it("should parse CSV with string literal columns (unquoted in CSV)", () => {
|
||||
const csv = [
|
||||
"name,status",
|
||||
|
|
|
|||
|
|
@ -789,34 +789,41 @@ export function parseCsv(
|
|||
const refBaseDir = options.refBaseDir;
|
||||
const defaultPrimaryKey = options.defaultPrimaryKey ?? "id";
|
||||
|
||||
const records = parse(content, {
|
||||
// Pre-strip comment lines from content before passing to csv-parse,
|
||||
// to avoid quote parsing errors in comment lines containing double quotes.
|
||||
const reverseReferences: ReverseReferenceDeclaration[] = [];
|
||||
let filteredContent = content;
|
||||
if (comment) {
|
||||
const lines = content.split(/\r?\n/);
|
||||
const nonCommentLines: string[] = [];
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith(comment)) {
|
||||
const decl = parseReverseReferenceDeclaration(trimmed, comment);
|
||||
if (decl) {
|
||||
reverseReferences.push(decl);
|
||||
}
|
||||
// Skip comment lines
|
||||
} else {
|
||||
nonCommentLines.push(line);
|
||||
}
|
||||
}
|
||||
filteredContent = nonCommentLines.join("\n");
|
||||
}
|
||||
|
||||
const records = parse(filteredContent, {
|
||||
delimiter,
|
||||
quote,
|
||||
escape,
|
||||
bom,
|
||||
// Don't let csv-parse skip comments; we need to parse them for reverse references.
|
||||
// Comment lines are filtered out manually below using the configured comment character.
|
||||
comment: undefined,
|
||||
trim,
|
||||
skip_empty_lines: true,
|
||||
relax_column_count: true,
|
||||
});
|
||||
|
||||
// Filter out comment lines from all records, collecting reverse reference declarations
|
||||
const reverseReferences: ReverseReferenceDeclaration[] = [];
|
||||
const filteredRecords: string[][] = [];
|
||||
for (const row of records) {
|
||||
const firstCell = (row[0] ?? "").trim();
|
||||
if (comment && firstCell.startsWith(comment)) {
|
||||
const decl = parseReverseReferenceDeclaration(firstCell, comment);
|
||||
if (decl) {
|
||||
reverseReferences.push(decl);
|
||||
}
|
||||
// Skip comment lines (whether or not they're reverse ref declarations)
|
||||
continue;
|
||||
}
|
||||
filteredRecords.push(row);
|
||||
}
|
||||
// Comment lines were already filtered out before parsing
|
||||
const filteredRecords = records;
|
||||
|
||||
if (filteredRecords.length < 2) {
|
||||
throw new Error("CSV must have at least 2 rows: headers and schemas");
|
||||
|
|
|
|||
Loading…
Reference in New Issue