diff --git a/src/csv-loader/loader.ts b/src/csv-loader/loader.ts index cd898de..0bda262 100644 --- a/src/csv-loader/loader.ts +++ b/src/csv-loader/loader.ts @@ -36,242 +36,12 @@ import { generateTypeDefinition } from "./type-gen.js"; import { csvToModule } from "./module-gen.js"; import * as fs from "fs"; import * as path from "path"; - -/** - * Parse a type declaration from a comment line. - * Format: # TypeName := schema - * Examples: - * # Trigger := 'onPlay' | 'onDraw' | 'onDiscard' - * # Effect := [Trigger, @effect, int] - * Returns null if the line is not a type declaration. - */ -function parseTypeDeclaration( - line: string, - commentChar: string = "#", -): { typeName: string; schemaString: string } | null { - const trimmed = line.trim(); - // Must start with the comment character - if (!trimmed.startsWith(commentChar)) return null; - - const content = trimmed.slice(commentChar.length).trim(); - - // Match pattern: TypeName := schema - const match = content.match(/^([A-Z][a-zA-Z0-9]*)\s*:=\s*(.+)$/); - if (!match) return null; - - const [, typeName, schemaString] = match; - return { typeName, schemaString }; -} - -/** - * Expand a type name to its schema by replacing the type name with its schema inline. - * Returns the schema string with type names expanded, or null if not a type name. - */ -function expandTypeName( - schemaString: string, - declaredTypes: Map, -): string | null { - const trimmed = schemaString.trim(); - if (declaredTypes.has(trimmed)) { - return declaredTypes.get(trimmed)!; - } - return null; -} - -/** - * Recursively expand all type name references in a schema string. - * Handles unions, tuples, arrays, and nested structures. - */ -function expandSchemaString( - schemaString: string, - declaredTypes: Map, -): string { - let result = schemaString; - - // Keep expanding until no more changes (handles recursive dependencies) - let prev = ""; - while (prev !== result) { - prev = result; - result = expandSchemaInString(result, declaredTypes); - } - - return result; -} - -/** - * Single pass of type name expansion in a schema string. - */ -function expandSchemaInString( - schemaString: string, - declaredTypes: Map, -): string { - // Check if the entire string is a type name - const expanded = expandTypeName(schemaString.trim(), declaredTypes); - if (expanded !== null) { - return expanded; - } - - // Handle union types (recursively expand each member) - if (schemaString.includes("|")) { - // Split by | but respect quotes - const parts = splitByToken(schemaString, "|"); - if (parts.length > 1) { - const expandedParts = parts.map((part) => - expandSchemaInString(part.trim(), declaredTypes), - ); - return expandedParts.join(" | "); - } - } - - // Handle tuple/array syntax [el1; el2; ...] or [elements] - // Check if it's a bracketed structure - if (schemaString.startsWith("[") && schemaString.endsWith("]")) { - const inner = schemaString.slice(1, -1); - // Check if it's semicolon-separated (tuple syntax) - if (inner.includes(";")) { - const elements = splitByToken(inner, ";"); - const expandedElements = elements.map((el) => - expandSchemaInString(el.trim(), declaredTypes), - ); - return `[${expandedElements.join("; ")}]`; - } - // Otherwise it's a simple array, expand recursively - return `[${expandSchemaInString(inner, declaredTypes)}]`; - } - - // Check if it's a type name reference (only uppercase start to avoid conflicts with primitives) - const typeNameMatch = schemaString.trim().match(/^[A-Z][a-zA-Z0-9]*$/); - if (typeNameMatch) { - const expanded = expandTypeName(schemaString.trim(), declaredTypes); - if (expanded !== null) { - return expanded; - } - } - - return schemaString; -} - -/** - * Split a string by a token, respecting quoted strings. - */ -function splitByToken(str: string, token: string): string[] { - const result: string[] = []; - let current = ""; - let inQuote: string | null = null; - - for (let i = 0; i < str.length; i++) { - const char = str[i]; - - if (inQuote) { - if (char === inQuote && str[i - 1] !== "\\") { - inQuote = null; - } - current += char; - } else if (char === '"' || char === "'") { - inQuote = char; - current += char; - } else if (char === token && inQuote === null) { - result.push(current); - current = ""; - } else { - current += char; - } - } - - if (current.length > 0 || str.endsWith(token)) { - result.push(current); - } - - return result; -} - -/** - * Resolve type name references within a schema using declared types. - * For example, if "Trigger" is a declared type, references to "Trigger" in - * other schemas will be replaced with the actual Trigger schema definition. - */ -function resolveTypeReferences( - schema: Schema, - declaredTypes: Map, -): Schema { - switch (schema.type) { - case "union": - return { - type: "union", - members: schema.members.map((m) => - resolveTypeReferences(m, declaredTypes), - ), - }; - case "tuple": - return { - type: "tuple", - elements: schema.elements.map((el) => ({ - name: el.name, - schema: resolveTypeReferences(el.schema, declaredTypes), - })), - }; - case "array": - return { - type: "array", - element: resolveTypeReferences(schema.element, declaredTypes), - }; - case "reference": - // Don't resolve references to other tables - return schema; - default: - return schema; - } -} - -/** - * Resolve type name references in a type declaration's schema string. - * Called after all type names are known. - */ -function resolveTypeDeclarationSchema( - schemaString: string, - declaredTypes: Map, -): Schema { - const schema = parseSchema(schemaString.trim()); - return resolveTypeReferences(schema, declaredTypes); -} - -/** - * Parse a reverse reference declaration from a comment line. - * Format: # fieldName := ~tableName(foreignKey) - * Returns null if the line is not a reverse reference declaration. - */ -function parseReverseReferenceDeclaration( - line: string, - commentChar: string = "#", -): ReverseReferenceDeclaration | null { - const trimmed = line.trim(); - // Must start with the comment character - if (!trimmed.startsWith(commentChar)) return null; - - const content = trimmed.slice(commentChar.length).trim(); - - // Match pattern: fieldName := ~tableName(foreignKey) - const match = content.match(/^(\w+)\s*:=\s*~(\w+)\((\w+)\)(\?)?$/); - if (!match) return null; - - const [, fieldName, tableName, foreignKey, optionalMark] = match; - const isOptional = optionalMark === "?"; - - const schema: ReverseReferenceSchema = { - type: "reverseReference", - tableName, - foreignKey, - isOptional, - }; - - return { - fieldName, - tableName, - foreignKey, - isOptional, - schema, - }; -} +import { + parseTypeDeclaration, + parseReverseReferenceDeclaration, + expandSchemaString, + resolveTypeReferences, +} from "./type-declarations.js"; /** * Parse CSV content string into structured data with schema validation. diff --git a/src/csv-loader/type-declarations.ts b/src/csv-loader/type-declarations.ts new file mode 100644 index 0000000..e3d2737 --- /dev/null +++ b/src/csv-loader/type-declarations.ts @@ -0,0 +1,251 @@ +import type { + Schema, + ReverseReferenceSchema, +} from "../types.js"; +import { parseSchema } from "../parser.js"; + +export interface ReverseReferenceDeclaration { + fieldName: string; + tableName: string; + foreignKey: string; + isOptional: boolean; + schema: ReverseReferenceSchema; +} + +export interface TypeDeclaration { + name: string; + schema: Schema; +} + +/** + * Parse a type declaration from a comment line. + * Format: # TypeName := schema + * Returns null if the line is not a type declaration. + */ +export function parseTypeDeclaration( + line: string, + commentChar: string = "#", +): { typeName: string; schemaString: string } | null { + const trimmed = line.trim(); + // Must start with the comment character + if (!trimmed.startsWith(commentChar)) return null; + + const content = trimmed.slice(commentChar.length).trim(); + + // Match pattern: TypeName := schema + const match = content.match(/^([A-Z][a-zA-Z0-9]*)\s*:=\s*(.+)$/); + if (!match) return null; + + const [, typeName, schemaString] = match; + return { typeName, schemaString }; +} + +/** + * Expand a type name to its schema by replacing the type name with its schema inline. + * Returns the schema string with type names expanded, or null if not a type name. + */ +function expandTypeName( + schemaString: string, + declaredTypes: Map, +): string | null { + const trimmed = schemaString.trim(); + if (declaredTypes.has(trimmed)) { + return declaredTypes.get(trimmed)!; + } + return null; +} + +/** + * Recursively expand all type name references in a schema string. + * Handles unions, tuples, arrays, and nested structures. + */ +export function expandSchemaString( + schemaString: string, + declaredTypes: Map, +): string { + let result = schemaString; + + // Keep expanding until no more changes (handles recursive dependencies) + let prev = ""; + while (prev !== result) { + prev = result; + result = expandSchemaInString(result, declaredTypes); + } + + return result; +} + +/** + * Single pass of type name expansion in a schema string. + */ +function expandSchemaInString( + schemaString: string, + declaredTypes: Map, +): string { + // Check if the entire string is a type name + const expanded = expandTypeName(schemaString.trim(), declaredTypes); + if (expanded !== null) { + return expanded; + } + + // Handle union types (recursively expand each member) + if (schemaString.includes("|")) { + // Split by | but respect quotes + const parts = splitByToken(schemaString, "|"); + if (parts.length > 1) { + const expandedParts = parts.map((part) => + expandSchemaInString(part.trim(), declaredTypes), + ); + return expandedParts.join(" | "); + } + } + + // Handle tuple/array syntax [el1; el2; ...] or [elements] + // Check if it's a bracketed structure + if (schemaString.startsWith("[") && schemaString.endsWith("]")) { + const inner = schemaString.slice(1, -1); + // Check if it's semicolon-separated (tuple syntax) + if (inner.includes(";")) { + const elements = splitByToken(inner, ";"); + const expandedElements = elements.map((el) => + expandSchemaInString(el.trim(), declaredTypes), + ); + return `[${expandedElements.join("; ")}]`; + } + // Otherwise it's a simple array, expand recursively + return `[${expandSchemaInString(inner, declaredTypes)}]`; + } + + // Check if it's a type name reference (only uppercase start to avoid conflicts with primitives) + const typeNameMatch = schemaString.trim().match(/^[A-Z][a-zA-Z0-9]*$/); + if (typeNameMatch) { + const expanded = expandTypeName(schemaString.trim(), declaredTypes); + if (expanded !== null) { + return expanded; + } + } + + return schemaString; +} + +/** + * Split a string by a token, respecting quoted strings. + */ +function splitByToken(str: string, token: string): string[] { + const result: string[] = []; + let current = ""; + let inQuote: string | null = null; + + for (let i = 0; i < str.length; i++) { + const char = str[i]; + + if (inQuote) { + if (char === inQuote && str[i - 1] !== "\\") { + inQuote = null; + } + current += char; + } else if (char === '"' || char === "'") { + inQuote = char; + current += char; + } else if (char === token && inQuote === null) { + result.push(current); + current = ""; + } else { + current += char; + } + } + + if (current.length > 0 || str.endsWith(token)) { + result.push(current); + } + + return result; +} + +/** + * Resolve type name references within a schema using declared types. + * For example, if "Trigger" is a declared type, references to "Trigger" in + * other schemas will be replaced with the actual Trigger schema definition. + */ +export function resolveTypeReferences( + schema: Schema, + declaredTypes: Map, +): Schema { + switch (schema.type) { + case "union": + return { + type: "union", + members: schema.members.map((m) => + resolveTypeReferences(m, declaredTypes), + ), + }; + case "tuple": + return { + type: "tuple", + elements: schema.elements.map((el) => ({ + name: el.name, + schema: resolveTypeReferences(el.schema, declaredTypes), + })), + }; + case "array": + return { + type: "array", + element: resolveTypeReferences(schema.element, declaredTypes), + }; + case "reference": + // Don't resolve references to other tables + return schema; + default: + return schema; + } +} + +/** + * Resolve type name references in a type declaration's schema string. + * Called after all type names are known. + */ +export function resolveTypeDeclarationSchema( + schemaString: string, + declaredTypes: Map, +): Schema { + const schema = parseSchema(schemaString.trim()); + return resolveTypeReferences(schema, declaredTypes); +} + +/** + * Parse a reverse reference declaration from a comment line. + * Format: # fieldName := ~tableName(foreignKey) + * Returns null if the line is not a reverse reference declaration. + */ +export function parseReverseReferenceDeclaration( + line: string, + commentChar: string = "#", +): ReverseReferenceDeclaration | null { + const trimmed = line.trim(); + // Must start with the comment character + if (!trimmed.startsWith(commentChar)) return null; + + const content = trimmed.slice(commentChar.length).trim(); + + // Match pattern: fieldName := ~tableName(foreignKey) + const match = content.match(/^(\w+)\s*:=\s*~(\w+)\((\w+)\)(\?)?$/); + if (!match) return null; + + const [, fieldName, tableName, foreignKey, optionalMark] = match; + const isOptional = optionalMark === "?"; + + const schema: ReverseReferenceSchema = { + type: "reverseReference", + tableName, + foreignKey, + isOptional, + }; + + return { + fieldName, + tableName, + foreignKey, + isOptional, + schema, + }; +} diff --git a/src/type-utils.ts b/src/type-utils.ts new file mode 100644 index 0000000..783497e --- /dev/null +++ b/src/type-utils.ts @@ -0,0 +1,111 @@ +import type { + Schema, + ReferenceSchema, + ReverseReferenceSchema, + UnionSchema, +} from "./types"; + +export function schemaToTypeString( + schema: Schema, + resourceNames?: Map, +): string { + switch (schema.type) { + case "string": + return "string"; + case "number": + case "int": + case "float": + return "number"; + case "boolean": + return "boolean"; + case "stringLiteral": + return `"${schema.value}"`; + case "union": + return schema.members + .map((m) => schemaToTypeString(m, resourceNames)) + .join(" | "); + case "reference": { + const typeName = + resourceNames?.get(schema.tableName) || + schema.tableName.charAt(0).toUpperCase() + schema.tableName.slice(1); + const baseType = schema.isArray ? `${typeName}[]` : typeName; + return schema.isOptional ? `${baseType} | null` : baseType; + } + case "reverseReference": { + const typeName = + resourceNames?.get(schema.tableName) || + schema.tableName.charAt(0).toUpperCase() + schema.tableName.slice(1); + const baseType = `${typeName}[]`; + return schema.isOptional ? `${baseType} | null` : baseType; + } + case "array": + if (schema.element.type === "tuple") { + const tupleElements = schema.element.elements.map((el) => { + const typeStr = schemaToTypeString(el.schema, resourceNames); + return el.name ? `${el.name}: ${typeStr}` : typeStr; + }); + return `[${tupleElements.join(", ")}][]`; + } + const elementType = schemaToTypeString(schema.element, resourceNames); + if (schema.element.type === "union") { + return `(${elementType})[]`; + } + return `${elementType}[]`; + case "tuple": + const tupleElements = schema.elements.map((el) => { + const typeStr = schemaToTypeString(el.schema, resourceNames); + return el.name ? `${el.name}: ${typeStr}` : typeStr; + }); + return `[${tupleElements.join(", ")}]`; + default: + return "unknown"; + } +} + +export function createValidator(schema: Schema): (value: unknown) => boolean { + return function validate(value: unknown): boolean { + switch (schema.type) { + case "string": + return typeof value === "string"; + case "number": + return typeof value === "number" && !isNaN(value); + case "int": + return ( + typeof value === "number" && !isNaN(value) && Number.isInteger(value) + ); + case "float": + return typeof value === "number" && !isNaN(value); + case "boolean": + return typeof value === "boolean"; + case "stringLiteral": + return typeof value === "string" && value === schema.value; + case "union": + return schema.members.some((member) => createValidator(member)(value)); + case "tuple": + if (!Array.isArray(value)) return false; + if (value.length !== schema.elements.length) return false; + return schema.elements.every((elementSchema, index) => + createValidator(elementSchema.schema)(value[index]), + ); + case "array": + if (!Array.isArray(value)) return false; + return value.every((item) => createValidator(schema.element)(item)); + case "reference": + if (schema.isOptional && value === null) return true; + if (schema.isArray) { + return ( + Array.isArray(value) && value.every((id) => typeof id === "string") + ); + } + return ( + typeof value === "string" || + (Array.isArray(value) && value.every((id) => typeof id === "string")) + ); + case "reverseReference": + if (schema.isOptional && value === null) return true; + return Array.isArray(value); + default: + return false; + } + }; +} diff --git a/src/validator.ts b/src/validator.ts index 6baa273..4a1430a 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -1,540 +1,3 @@ -import type { - Schema, - PrimitiveSchema, - TupleSchema, - ArraySchema, - NamedSchema, - ReferenceSchema, - ReverseReferenceSchema, - StringLiteralSchema, - UnionSchema, -} from "./types"; -import { ParseError } from "./parser"; - -class ValueParser { - private input: string; - private pos: number = 0; - - constructor(input: string) { - this.input = input; - } - - private peek(): string { - return this.input[this.pos] || ""; - } - - private consume(): string { - return this.input[this.pos++] || ""; - } - - private skipWhitespace(): void { - while (this.pos < this.input.length && /\s/.test(this.input[this.pos])) { - this.pos++; - } - } - - private consumeStr(str: string): boolean { - if (this.input.slice(this.pos, this.pos + str.length) === str) { - this.pos += str.length; - return true; - } - return false; - } - - parseValue(schema: Schema, allowOmitBrackets: boolean = false): unknown { - this.skipWhitespace(); - - switch (schema.type) { - case "string": - return this.parseStringValue(); - case "number": - return this.parseNumberValue(); - case "int": - return this.parseIntValue(); - case "float": - return this.parseFloatValue(); - case "boolean": - return this.parseBooleanValue(); - case "stringLiteral": - return this.parseStringLiteralValue(schema); - case "union": - return this.parseUnionValue(schema); - case "tuple": - return this.parseTupleValue(schema, allowOmitBrackets); - case "array": - return this.parseArrayValue(schema, allowOmitBrackets); - case "reference": - // Reference values are parsed as strings (IDs) initially, resolved later - return this.parseReferenceValue(schema); - case "reverseReference": - // Reverse references are derived fields, not stored in CSV cells - // They resolve to null at parse time; actual resolution happens in the loader - return null; - default: - throw new ParseError( - `Unknown schema type: ${(schema as { type: string }).type}`, - this.pos, - ); - } - } - - private parseStringValue(): string { - let result = ""; - while (this.pos < this.input.length) { - const char = this.peek(); - - if (char === "\\") { - this.consume(); - const nextChar = this.consume(); - if ( - nextChar === ";" || - nextChar === "[" || - nextChar === "]" || - nextChar === "\\" - ) { - result += nextChar; - } else { - result += "\\" + nextChar; - } - } else if (char === ";" || char === "]") { - break; - } else { - result += this.consume(); - } - } - return result.trim(); - } - - private parseNumberValue(): number { - let numStr = ""; - while (this.pos < this.input.length && /[\d.\-+eE]/.test(this.peek())) { - numStr += this.consume(); - } - const num = parseFloat(numStr); - if (isNaN(num)) { - throw new ParseError("Invalid number", this.pos - numStr.length); - } - return num; - } - - private parseIntValue(): number { - let numStr = ""; - while (this.pos < this.input.length && /[\d.\-+eE]/.test(this.peek())) { - numStr += this.consume(); - } - const num = parseFloat(numStr); - if (isNaN(num)) { - throw new ParseError("Invalid number", this.pos - numStr.length); - } - if (!Number.isInteger(num)) { - throw new ParseError("Expected integer value", this.pos - numStr.length); - } - return num; - } - - private parseFloatValue(): number { - return this.parseNumberValue(); - } - - private parseBooleanValue(): boolean { - if (this.consumeStr("true")) { - return true; - } - if (this.consumeStr("false")) { - return false; - } - throw new ParseError("Expected true or false", this.pos); - } - - private parseStringLiteralValue(schema: StringLiteralSchema): string { - const quote = this.peek(); - - // 支持带引号或不带引号的字符串值 - if (quote === '"' || quote === "'") { - this.consume(); // Consume opening quote - - let value = ""; - while (this.pos < this.input.length) { - const char = this.peek(); - - if (char === "\\") { - this.consume(); - const nextChar = this.consume(); - if ( - nextChar === '"' || - nextChar === "'" || - nextChar === "\\" || - nextChar === ";" - ) { - value += nextChar; - } else { - value += "\\" + nextChar; - } - } else if (char === quote) { - this.consume(); // Consume closing quote - - if (value !== schema.value) { - throw new ParseError( - `Invalid value '"${value}"'. Expected '"${schema.value}"'`, - this.pos, - ); - } - - return value; - } else { - value += this.consume(); - } - } - - throw new ParseError("Unterminated string literal", this.pos); - } else { - // 不带引号的字符串,像普通字符串一样解析 - let value = ""; - while (this.pos < this.input.length) { - const char = this.peek(); - if (char === ";" || char === "]" || char === ")") { - break; - } - value += this.consume(); - } - - value = value.trim(); - - if (value !== schema.value) { - throw new ParseError( - `Invalid value '${value}'. Expected '${schema.value}'`, - this.pos - value.length, - ); - } - - return value; - } - } - - private parseUnionValue(schema: UnionSchema): unknown { - const savedPos = this.pos; - const errors: Error[] = []; - - // Try each union member until one succeeds - for (let i = 0; i < schema.members.length; i++) { - this.pos = savedPos; - try { - return this.parseValue(schema.members[i], false); - } catch (e) { - errors.push(e as Error); - // Continue to next member - } - } - - // If all members fail, throw a descriptive error - throw new ParseError( - `Value does not match any union member. Tried ${schema.members.length} alternatives.`, - this.pos, - ); - } - - private parseTupleValue( - schema: TupleSchema, - allowOmitBrackets: boolean, - ): unknown[] { - let hasOpenBracket = false; - - if (this.peek() === "[") { - this.consume(); - hasOpenBracket = true; - } else if (!allowOmitBrackets) { - throw new ParseError("Expected [", this.pos); - } - - this.skipWhitespace(); - - if (this.peek() === "]" && hasOpenBracket) { - this.consume(); - return []; - } - - const result: unknown[] = []; - for (let i = 0; i < schema.elements.length; i++) { - this.skipWhitespace(); - const elementSchema = schema.elements[i]; - - // Try to consume optional name prefix (e.g., "current:") - if (elementSchema.name) { - this.skipWhitespace(); - const savedPos = this.pos; - if (this.consumeStr(`${elementSchema.name}:`)) { - this.skipWhitespace(); - } else { - // Name not found, reset position and continue without name - this.pos = savedPos; - } - } - - result.push(this.parseValue(elementSchema.schema, false)); - this.skipWhitespace(); - - if (i < schema.elements.length - 1) { - if (!this.consumeStr(";")) { - throw new ParseError("Expected ;", this.pos); - } - } - } - - this.skipWhitespace(); - - if (hasOpenBracket) { - if (!this.consumeStr("]")) { - throw new ParseError("Expected ]", this.pos); - } - } - - return result; - } - - private parseArrayValue( - schema: ArraySchema, - allowOmitBrackets: boolean, - ): unknown[] { - let hasOpenBracket = false; - const elementIsTupleOrArray = - schema.element.type === "tuple" || schema.element.type === "array"; - - if (this.pos >= this.input.length || !this.input.trim()) { - return []; - } - - if (this.peek() === "[") { - if (!elementIsTupleOrArray) { - this.consume(); - hasOpenBracket = true; - } else if (this.input[this.pos + 1] === "[") { - this.consume(); - hasOpenBracket = true; - } - } - - if (!hasOpenBracket && !allowOmitBrackets && !elementIsTupleOrArray) { - throw new ParseError("Expected [", this.pos); - } - - this.skipWhitespace(); - - if (this.peek() === "]" && hasOpenBracket) { - this.consume(); - return []; - } - - const result: unknown[] = []; - while (true) { - this.skipWhitespace(); - result.push(this.parseValue(schema.element, false)); - this.skipWhitespace(); - - if (!this.consumeStr(";")) { - break; - } - } - - this.skipWhitespace(); - - if (hasOpenBracket) { - if (!this.consumeStr("]")) { - throw new ParseError("Expected ]", this.pos); - } - } - - return result; - } - - private parseReferenceValue( - schema: ReferenceSchema, - ): string | string[] | null { - if (schema.isOptional) { - this.skipWhitespace(); - if (this.pos >= this.input.length) { - return null; - } - } - - if (schema.isArray) { - // Parse array of IDs: [id1; id2; id3] - let hasOpenBracket = false; - if (this.peek() === "[") { - this.consume(); - hasOpenBracket = true; - } - - this.skipWhitespace(); - - if (this.peek() === "]" && hasOpenBracket) { - this.consume(); - return []; - } - - const ids: string[] = []; - while (true) { - this.skipWhitespace(); - // Parse each ID as a string - let id = ""; - while ( - this.pos < this.input.length && - this.peek() !== ";" && - this.peek() !== "]" - ) { - id += this.consume(); - } - ids.push(id.trim()); - this.skipWhitespace(); - - if (!this.consumeStr(";")) { - break; - } - } - - if (hasOpenBracket) { - if (!this.consumeStr("]")) { - throw new ParseError("Expected ]", this.pos); - } - } - - return ids; - } else { - // Parse single ID as string - let id = ""; - while (this.pos < this.input.length) { - const char = this.peek(); - if (char === ";" || char === "]" || char === ",") { - break; - } - id += this.consume(); - } - return id.trim(); - } - } - - getPosition(): number { - return this.pos; - } - - getInputLength(): number { - return this.input.length; - } -} - -export function parseValue(schema: Schema, valueString: string): unknown { - const parser = new ValueParser(valueString.trim()); - const allowOmitBrackets = schema.type === "tuple" || schema.type === "array"; - const value = parser.parseValue(schema, allowOmitBrackets); - - if (parser.getPosition() < parser.getInputLength()) { - throw new ParseError("Unexpected input after value", parser.getPosition()); - } - - return value; -} - -export function schemaToTypeString( - schema: Schema, - resourceNames?: Map, -): string { - switch (schema.type) { - case "string": - return "string"; - case "number": - case "int": - case "float": - return "number"; - case "boolean": - return "boolean"; - case "stringLiteral": - return `"${schema.value}"`; - case "union": - return schema.members - .map((m) => schemaToTypeString(m, resourceNames)) - .join(" | "); - case "reference": { - const typeName = - resourceNames?.get(schema.tableName) || - schema.tableName.charAt(0).toUpperCase() + schema.tableName.slice(1); - const baseType = schema.isArray ? `${typeName}[]` : typeName; - return schema.isOptional ? `${baseType} | null` : baseType; - } - case "reverseReference": { - const typeName = - resourceNames?.get(schema.tableName) || - schema.tableName.charAt(0).toUpperCase() + schema.tableName.slice(1); - // Reverse references always resolve to an array (one-to-many) - const baseType = `${typeName}[]`; - return schema.isOptional ? `${baseType} | null` : baseType; - } - case "array": - if (schema.element.type === "tuple") { - const tupleElements = schema.element.elements.map((el) => { - const typeStr = schemaToTypeString(el.schema, resourceNames); - return el.name ? `${el.name}: ${typeStr}` : typeStr; - }); - return `[${tupleElements.join(", ")}][]`; - } - const elementType = schemaToTypeString(schema.element, resourceNames); - if (schema.element.type === "union") { - return `(${elementType})[]`; - } - return `${elementType}[]`; - case "tuple": - const tupleElements = schema.elements.map((el) => { - const typeStr = schemaToTypeString(el.schema, resourceNames); - return el.name ? `${el.name}: ${typeStr}` : typeStr; - }); - return `[${tupleElements.join(", ")}]`; - default: - return "unknown"; - } -} - -export function createValidator(schema: Schema): (value: unknown) => boolean { - return function validate(value: unknown): boolean { - switch (schema.type) { - case "string": - return typeof value === "string"; - case "number": - return typeof value === "number" && !isNaN(value); - case "int": - return ( - typeof value === "number" && !isNaN(value) && Number.isInteger(value) - ); - case "float": - return typeof value === "number" && !isNaN(value); - case "boolean": - return typeof value === "boolean"; - case "stringLiteral": - return typeof value === "string" && value === schema.value; - case "union": - return schema.members.some((member) => createValidator(member)(value)); - case "tuple": - if (!Array.isArray(value)) return false; - if (value.length !== schema.elements.length) return false; - return schema.elements.every((elementSchema, index) => - createValidator(elementSchema.schema)(value[index]), - ); - case "array": - if (!Array.isArray(value)) return false; - return value.every((item) => createValidator(schema.element)(item)); - case "reference": - if (schema.isOptional && value === null) return true; - if (schema.isArray) { - return ( - Array.isArray(value) && value.every((id) => typeof id === "string") - ); - } - return ( - typeof value === "string" || - (Array.isArray(value) && value.every((id) => typeof id === "string")) - ); - case "reverseReference": - if (schema.isOptional && value === null) return true; - return Array.isArray(value); - default: - return false; - } - }; -} +// Re-export everything from the split modules +export { parseValue } from "./value-parser"; +export { schemaToTypeString, createValidator } from "./type-utils"; diff --git a/src/value-parser.ts b/src/value-parser.ts new file mode 100644 index 0000000..ddce0dd --- /dev/null +++ b/src/value-parser.ts @@ -0,0 +1,431 @@ +import type { + Schema, + TupleSchema, + ArraySchema, + ReferenceSchema, + StringLiteralSchema, + UnionSchema, +} from "./types"; +import { ParseError } from "./parser"; + +class ValueParser { + private input: string; + private pos: number = 0; + + constructor(input: string) { + this.input = input; + } + + private peek(): string { + return this.input[this.pos] || ""; + } + + private consume(): string { + return this.input[this.pos++] || ""; + } + + private skipWhitespace(): void { + while (this.pos < this.input.length && /\s/.test(this.input[this.pos])) { + this.pos++; + } + } + + private consumeStr(str: string): boolean { + if (this.input.slice(this.pos, this.pos + str.length) === str) { + this.pos += str.length; + return true; + } + return false; + } + + parseValue(schema: Schema, allowOmitBrackets: boolean = false): unknown { + this.skipWhitespace(); + + switch (schema.type) { + case "string": + return this.parseStringValue(); + case "number": + return this.parseNumberValue(); + case "int": + return this.parseIntValue(); + case "float": + return this.parseFloatValue(); + case "boolean": + return this.parseBooleanValue(); + case "stringLiteral": + return this.parseStringLiteralValue(schema); + case "union": + return this.parseUnionValue(schema); + case "tuple": + return this.parseTupleValue(schema, allowOmitBrackets); + case "array": + return this.parseArrayValue(schema, allowOmitBrackets); + case "reference": + // Reference values are parsed as strings (IDs) initially, resolved later + return this.parseReferenceValue(schema); + case "reverseReference": + // Reverse references are derived fields, not stored in CSV cells + // They resolve to null at parse time; actual resolution happens in the loader + return null; + default: + throw new ParseError( + `Unknown schema type: ${(schema as { type: string }).type}`, + this.pos, + ); + } + } + + private parseStringValue(): string { + let result = ""; + while (this.pos < this.input.length) { + const char = this.peek(); + + if (char === "\\") { + this.consume(); + const nextChar = this.consume(); + if ( + nextChar === ";" || + nextChar === "[" || + nextChar === "]" || + nextChar === "\\" + ) { + result += nextChar; + } else { + result += "\\" + nextChar; + } + } else if (char === ";" || char === "]") { + break; + } else { + result += this.consume(); + } + } + return result.trim(); + } + + private parseNumberValue(): number { + let numStr = ""; + while (this.pos < this.input.length && /[\d.\-+eE]/.test(this.peek())) { + numStr += this.consume(); + } + const num = parseFloat(numStr); + if (isNaN(num)) { + throw new ParseError("Invalid number", this.pos - numStr.length); + } + return num; + } + + private parseIntValue(): number { + let numStr = ""; + while (this.pos < this.input.length && /[\d.\-+eE]/.test(this.peek())) { + numStr += this.consume(); + } + const num = parseFloat(numStr); + if (isNaN(num)) { + throw new ParseError("Invalid number", this.pos - numStr.length); + } + if (!Number.isInteger(num)) { + throw new ParseError("Expected integer value", this.pos - numStr.length); + } + return num; + } + + private parseFloatValue(): number { + return this.parseNumberValue(); + } + + private parseBooleanValue(): boolean { + if (this.consumeStr("true")) { + return true; + } + if (this.consumeStr("false")) { + return false; + } + throw new ParseError("Expected true or false", this.pos); + } + + private parseStringLiteralValue(schema: StringLiteralSchema): string { + const quote = this.peek(); + + // 支持带引号或不带引号的字符串值 + if (quote === '"' || quote === "'") { + this.consume(); // Consume opening quote + + let value = ""; + while (this.pos < this.input.length) { + const char = this.peek(); + + if (char === "\\") { + this.consume(); + const nextChar = this.consume(); + if ( + nextChar === '"' || + nextChar === "'" || + nextChar === "\\" || + nextChar === ";" + ) { + value += nextChar; + } else { + value += "\\" + nextChar; + } + } else if (char === quote) { + this.consume(); // Consume closing quote + + if (value !== schema.value) { + throw new ParseError( + `Invalid value '"${value}"'. Expected '"${schema.value}"'`, + this.pos, + ); + } + + return value; + } else { + value += this.consume(); + } + } + + throw new ParseError("Unterminated string literal", this.pos); + } else { + // 不带引号的字符串,像普通字符串一样解析 + let value = ""; + while (this.pos < this.input.length) { + const char = this.peek(); + if (char === ";" || char === "]" || char === ")") { + break; + } + value += this.consume(); + } + + value = value.trim(); + + if (value !== schema.value) { + throw new ParseError( + `Invalid value '${value}'. Expected '${schema.value}'`, + this.pos - value.length, + ); + } + + return value; + } + } + + private parseUnionValue(schema: UnionSchema): unknown { + const savedPos = this.pos; + const errors: Error[] = []; + + // Try each union member until one succeeds + for (let i = 0; i < schema.members.length; i++) { + this.pos = savedPos; + try { + return this.parseValue(schema.members[i], false); + } catch (e) { + errors.push(e as Error); + // Continue to next member + } + } + + // If all members fail, throw a descriptive error + throw new ParseError( + `Value does not match any union member. Tried ${schema.members.length} alternatives.`, + this.pos, + ); + } + + private parseTupleValue( + schema: TupleSchema, + allowOmitBrackets: boolean, + ): unknown[] { + let hasOpenBracket = false; + + if (this.peek() === "[") { + this.consume(); + hasOpenBracket = true; + } else if (!allowOmitBrackets) { + throw new ParseError("Expected [", this.pos); + } + + this.skipWhitespace(); + + if (this.peek() === "]" && hasOpenBracket) { + this.consume(); + return []; + } + + const result: unknown[] = []; + for (let i = 0; i < schema.elements.length; i++) { + this.skipWhitespace(); + const elementSchema = schema.elements[i]; + + // Try to consume optional name prefix (e.g., "current:") + if (elementSchema.name) { + this.skipWhitespace(); + const savedPos = this.pos; + if (this.consumeStr(`${elementSchema.name}:`)) { + this.skipWhitespace(); + } else { + // Name not found, reset position and continue without name + this.pos = savedPos; + } + } + + result.push(this.parseValue(elementSchema.schema, false)); + this.skipWhitespace(); + + if (i < schema.elements.length - 1) { + if (!this.consumeStr(";")) { + throw new ParseError("Expected ;", this.pos); + } + } + } + + this.skipWhitespace(); + + if (hasOpenBracket) { + if (!this.consumeStr("]")) { + throw new ParseError("Expected ]", this.pos); + } + } + + return result; + } + + private parseArrayValue( + schema: ArraySchema, + allowOmitBrackets: boolean, + ): unknown[] { + let hasOpenBracket = false; + const elementIsTupleOrArray = + schema.element.type === "tuple" || schema.element.type === "array"; + + if (this.pos >= this.input.length || !this.input.trim()) { + return []; + } + + if (this.peek() === "[") { + if (!elementIsTupleOrArray) { + this.consume(); + hasOpenBracket = true; + } else if (this.input[this.pos + 1] === "[") { + this.consume(); + hasOpenBracket = true; + } + } + + if (!hasOpenBracket && !allowOmitBrackets && !elementIsTupleOrArray) { + throw new ParseError("Expected [", this.pos); + } + + this.skipWhitespace(); + + if (this.peek() === "]" && hasOpenBracket) { + this.consume(); + return []; + } + + const result: unknown[] = []; + while (true) { + this.skipWhitespace(); + result.push(this.parseValue(schema.element, false)); + this.skipWhitespace(); + + if (!this.consumeStr(";")) { + break; + } + } + + this.skipWhitespace(); + + if (hasOpenBracket) { + if (!this.consumeStr("]")) { + throw new ParseError("Expected ]", this.pos); + } + } + + return result; + } + + private parseReferenceValue( + schema: ReferenceSchema, + ): string | string[] | null { + if (schema.isOptional) { + this.skipWhitespace(); + if (this.pos >= this.input.length) { + return null; + } + } + + if (schema.isArray) { + // Parse array of IDs: [id1; id2; id3] + let hasOpenBracket = false; + if (this.peek() === "[") { + this.consume(); + hasOpenBracket = true; + } + + this.skipWhitespace(); + + if (this.peek() === "]" && hasOpenBracket) { + this.consume(); + return []; + } + + const ids: string[] = []; + while (true) { + this.skipWhitespace(); + // Parse each ID as a string + let id = ""; + while ( + this.pos < this.input.length && + this.peek() !== ";" && + this.peek() !== "]" + ) { + id += this.consume(); + } + ids.push(id.trim()); + this.skipWhitespace(); + + if (!this.consumeStr(";")) { + break; + } + } + + if (hasOpenBracket) { + if (!this.consumeStr("]")) { + throw new ParseError("Expected ]", this.pos); + } + } + + return ids; + } else { + // Parse single ID as string + let id = ""; + while (this.pos < this.input.length) { + const char = this.peek(); + if (char === ";" || char === "]" || char === ",") { + break; + } + id += this.consume(); + } + return id.trim(); + } + } + + getPosition(): number { + return this.pos; + } + + getInputLength(): number { + return this.input.length; + } +} + +export function parseValue(schema: Schema, valueString: string): unknown { + const parser = new ValueParser(valueString.trim()); + const allowOmitBrackets = schema.type === "tuple" || schema.type === "array"; + const value = parser.parseValue(schema, allowOmitBrackets); + + if (parser.getPosition() < parser.getInputLength()) { + throw new ParseError("Unexpected input after value", parser.getPosition()); + } + + return value; +}