gen_pojo_from_xls_json.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. #!/usr/bin/env node
  2. const fs = require('fs');
  3. const path = require('path');
  4. // 尝试加载xlsx模块,如果不存在则提供友好的错误信息
  5. // 注意:xlsx模块支持读取.xls格式文件
  6. let xlsx;
  7. try {
  8. xlsx = require('xlsx');
  9. } catch (e) {
  10. console.error('Error: xlsx module not found. Please install it with: npm install xlsx');
  11. console.error('Note: This script only supports .xls format files, not .xlsx');
  12. process.exit(1);
  13. }
  14. // Usage:
  15. // node gen_pojo_from_xls_json.js <sourceJsonDir> <sourceXlsDir> <packageName> <outputDir> [Tables...] [--ignore=a,b,c]
  16. // Tables can be names with or without .json/.xls, case-insensitive. Multiple can be space- or comma-separated.
  17. // Note: Only .xls format is supported for Excel files, not .xlsx
  18. const [,, jsonDir, xlsDir, packageName, outputDir, ...rest] = process.argv;
  19. if (!jsonDir || !xlsDir || !packageName || !outputDir) {
  20. console.log('Usage: node gen_pojo_from_xls_json.js <sourceJsonDir> <sourceXlsDir> <packageName> <outputDir> [Tables...] [--ignore=a,b,c]');
  21. process.exit(1);
  22. }
  23. function normalizeName(n) {
  24. return String(n).toLowerCase().replace(/\.[^.]+$/, '');
  25. }
  26. function parseArgs(args) {
  27. const whitelist = [];
  28. const blacklist = [];
  29. for (let i = 0; i < args.length; i++) {
  30. const a = String(args[i]);
  31. if (a.startsWith('--ignore=')) {
  32. const v = a.substring('--ignore='.length);
  33. v.split(',').map(s => s.trim()).filter(Boolean).forEach(x => blacklist.push(normalizeName(x)));
  34. continue;
  35. }
  36. if (a === '--ignore' && i + 1 < args.length) {
  37. const v = String(args[++i]);
  38. v.split(',').map(s => s.trim()).filter(Boolean).forEach(x => blacklist.push(normalizeName(x)));
  39. continue;
  40. }
  41. whitelist.push(normalizeName(a));
  42. }
  43. return { whitelist: new Set(whitelist.filter(Boolean)), blacklist: new Set(blacklist) };
  44. }
  45. const { whitelist: whiteSet, blacklist: blackSet } = parseArgs(rest);
  46. function listJsonFiles(dir) {
  47. return fs.readdirSync(dir)
  48. .filter(name => name.toLowerCase().endsWith('.json'))
  49. .map(name => path.join(dir, name));
  50. }
  51. function readJson(filePath) {
  52. let content = fs.readFileSync(filePath, 'utf8');
  53. if (content.charCodeAt(0) === 0xFEFF) content = content.slice(1);
  54. return JSON.parse(content);
  55. }
  56. function pickSample(root) {
  57. if (Array.isArray(root)) return root.length > 0 ? root[0] : null;
  58. if (root && typeof root === 'object') {
  59. const keys = Object.keys(root);
  60. return keys.length > 0 ? root[keys[0]] : null;
  61. }
  62. return null;
  63. }
  64. function analyzeAllSamples(root) {
  65. if (Array.isArray(root)) {
  66. return root; // 分析所有样本
  67. }
  68. if (root && typeof root === 'object') {
  69. const keys = Object.keys(root);
  70. return keys.map(k => root[k]); // 分析所有样本
  71. }
  72. return [];
  73. }
  74. function upperFirst(str) {
  75. if (!str) return str;
  76. return str.charAt(0).toUpperCase() + str.slice(1);
  77. }
  78. // Convert filename like M_item.json -> MItem
  79. function toClassName(fileName) {
  80. const base = fileName.replace(/\.[^.]+$/, '');
  81. return base.split(/[_\-\s]+/).filter(Boolean).map(upperFirst).join('');
  82. }
  83. function isInteger(num) {
  84. return typeof num === 'number' && Number.isInteger(num);
  85. }
  86. function isLong(num) {
  87. return typeof num === 'number' && Number.isInteger(num) && (num > 2147483647 || num < -2147483648);
  88. }
  89. function isFloat(num) {
  90. return typeof num === 'number' && !Number.isInteger(num);
  91. }
  92. // 检查字符串是否包含小数点(用于识别BigDecimal类型)
  93. function hasDecimalPoint(str) {
  94. return typeof str === 'string' && str.includes('.') && !isNaN(Number(str));
  95. }
  96. // 检查字符串是否为有效的数字格式
  97. function isValidNumberString(str) {
  98. if (typeof str !== 'string') return false;
  99. const trimmed = str.trim();
  100. if (trimmed === '') return false;
  101. return !isNaN(Number(trimmed)) && isFinite(Number(trimmed));
  102. }
  103. function inferJavaType(value) {
  104. if (value === null || value === undefined) return 'String';
  105. if (typeof value === 'number') {
  106. if (isLong(value)) return 'Long';
  107. if (isInteger(value)) return 'Integer';
  108. if (isFloat(value)) return 'BigDecimal';
  109. return 'BigDecimal';
  110. }
  111. if (typeof value === 'boolean') return 'Boolean';
  112. if (typeof value === 'string') return 'String';
  113. if (Array.isArray(value)) {
  114. if (value.length === 0) return 'List<Object>';
  115. return `List<${inferJavaType(value[0])}>`;
  116. }
  117. if (typeof value === 'object') return 'Object';
  118. return 'String';
  119. }
  120. function inferJavaTypeFromSamples(samples, fieldName) {
  121. if (samples.length === 0) return 'String';
  122. // 统计所有样本的类型
  123. const typeStats = {
  124. null: 0,
  125. undefined: 0,
  126. number: 0,
  127. boolean: 0,
  128. string: 0,
  129. array: 0,
  130. object: 0
  131. };
  132. const numberValues = [];
  133. const stringValues = [];
  134. const arrayValues = [];
  135. for (const sample of samples) {
  136. if (sample === null) {
  137. typeStats.null++;
  138. } else if (sample === undefined) {
  139. typeStats.undefined++;
  140. } else if (typeof sample === 'number') {
  141. typeStats.number++;
  142. numberValues.push(sample);
  143. } else if (typeof sample === 'boolean') {
  144. typeStats.boolean++;
  145. } else if (typeof sample === 'string') {
  146. typeStats.string++;
  147. stringValues.push(sample);
  148. } else if (Array.isArray(sample)) {
  149. typeStats.array++;
  150. arrayValues.push(sample);
  151. } else if (typeof sample === 'object') {
  152. typeStats.object++;
  153. }
  154. }
  155. // 找出主要类型
  156. const total = samples.length;
  157. const maxType = Object.keys(typeStats).reduce((a, b) => typeStats[a] > typeStats[b] ? a : b);
  158. const maxRatio = typeStats[maxType] / total;
  159. // 如果主要类型占比超过80%,使用该类型
  160. if (maxRatio >= 0.8) {
  161. switch (maxType) {
  162. case 'number':
  163. if (numberValues.length === 0) return 'Integer';
  164. // 检查是否需要Long类型
  165. const hasLong = numberValues.some(n => isLong(n));
  166. const hasFloat = numberValues.some(n => isFloat(n));
  167. if (hasLong) return 'Long';
  168. if (hasFloat) return 'BigDecimal';
  169. return 'Integer';
  170. case 'boolean':
  171. return 'Boolean';
  172. case 'string':
  173. // 检查字符串是否都是数字格式
  174. const allNumeric = stringValues.every(s => isValidNumberString(s));
  175. if (allNumeric && stringValues.length > 0) {
  176. const allNumbers = stringValues.map(s => Number(s));
  177. const hasLong = allNumbers.some(n => isLong(n));
  178. const hasFloat = allNumbers.some(n => isFloat(n));
  179. // 检查原始字符串是否包含小数点
  180. const hasDecimalPointInStrings = stringValues.some(s => hasDecimalPoint(s));
  181. if (hasLong) return 'Long';
  182. if (hasFloat || hasDecimalPointInStrings) return 'BigDecimal';
  183. return 'Integer';
  184. }
  185. return 'String';
  186. case 'array':
  187. if (arrayValues.length === 0) return 'List<Object>';
  188. // 分析数组元素类型
  189. const allArrayElements = [];
  190. for (const arr of arrayValues) {
  191. allArrayElements.push(...arr.slice(0, 3)); // 取前3个元素
  192. }
  193. if (allArrayElements.length === 0) return 'List<Object>';
  194. const elementType = inferJavaTypeFromSamples(allArrayElements, fieldName + '_element');
  195. return `List<${elementType}>`;
  196. case 'object':
  197. return 'Object';
  198. default:
  199. return 'String';
  200. }
  201. }
  202. // 混合类型,优先选择数字类型(可能是字符串形式的数字)
  203. if (typeStats.number > 0 && typeStats.string > 0) {
  204. // 检查字符串是否都是有效数字
  205. const allNumeric = stringValues.every(s => isValidNumberString(s));
  206. if (allNumeric) {
  207. const allNumbers = [...numberValues, ...stringValues.map(s => Number(s))];
  208. const hasLong = allNumbers.some(n => isLong(n));
  209. const hasFloat = allNumbers.some(n => isFloat(n));
  210. // 检查原始字符串是否包含小数点
  211. const hasDecimalPointInStrings = stringValues.some(s => hasDecimalPoint(s));
  212. if (hasLong) return 'Long';
  213. if (hasFloat || hasDecimalPointInStrings) return 'BigDecimal';
  214. return 'Integer';
  215. }
  216. }
  217. // 如果只有字符串类型,检查是否都是数字格式
  218. if (typeStats.string > 0 && typeStats.number === 0) {
  219. const allNumeric = stringValues.every(s => isValidNumberString(s));
  220. if (allNumeric && stringValues.length > 0) {
  221. const allNumbers = stringValues.map(s => Number(s));
  222. const hasLong = allNumbers.some(n => isLong(n));
  223. const hasFloat = allNumbers.some(n => isFloat(n));
  224. // 检查原始字符串是否包含小数点
  225. const hasDecimalPointInStrings = stringValues.some(s => hasDecimalPoint(s));
  226. if (hasLong) return 'Long';
  227. if (hasFloat || hasDecimalPointInStrings) return 'BigDecimal';
  228. return 'Integer';
  229. }
  230. }
  231. // 默认返回String
  232. return 'String';
  233. }
  234. const JAVA_KEYWORDS = new Set([
  235. 'abstract','assert','boolean','break','byte','case','catch','char','class','const','continue','default','do','double','else','enum','extends','final','finally','float','for','goto','if','implements','import','instanceof','int','interface','long','native','new','package','private','protected','public','return','short','static','strictfp','super','switch','synchronized','this','throw','throws','transient','try','void','volatile','while','null','true','false'
  236. ]);
  237. function sanitizeFieldName(name) {
  238. let safe = name.replace(/[^A-Za-z0-9_]/g, '_');
  239. if (!/^[A-Za-z_]/.test(safe)) safe = '_' + safe;
  240. if (JAVA_KEYWORDS.has(safe)) safe = safe + '_';
  241. return safe;
  242. }
  243. function readXlsComments(xlsFile) {
  244. try {
  245. const wb = xlsx.readFile(xlsFile);
  246. const sheetName = wb.SheetNames[0];
  247. const ws = wb.Sheets[sheetName];
  248. const rows = xlsx.utils.sheet_to_json(ws, { header: 1, raw: false });
  249. // Heuristics: within first 5 rows, pick field row as the one with max ASCII ratio and contains 'ID' or many word tokens
  250. const maxCheck = Math.min(5, rows.length);
  251. let fieldRowIdx = -1, commentRowIdx = -1;
  252. let bestScore = -1;
  253. for (let i = 0; i < maxCheck; i++) {
  254. const row = rows[i] || [];
  255. const joined = row.join(' ').trim();
  256. if (!joined) continue;
  257. const asciiCount = (joined.match(/[\x20-\x7E]/g) || []).length;
  258. const ratio = asciiCount / Math.max(1, joined.length);
  259. const wordCount = (joined.match(/[A-Za-z_][A-Za-z0-9_]*/g) || []).length;
  260. const score = ratio * 0.7 + Math.min(wordCount / Math.max(1, row.length), 1) * 0.3 + (joined.includes('ID') ? 0.2 : 0);
  261. if (score > bestScore) { bestScore = score; fieldRowIdx = i; }
  262. }
  263. // Comment row: prefer a different row with lower ASCII ratio (more likely Chinese)
  264. let bestCR = -1;
  265. for (let i = 0; i < maxCheck; i++) {
  266. if (i === fieldRowIdx) continue;
  267. const row = rows[i] || [];
  268. const joined = row.join(' ').trim();
  269. if (!joined) continue;
  270. const asciiCount = (joined.match(/[\x20-\x7E]/g) || []).length;
  271. const ratio = asciiCount / Math.max(1, joined.length);
  272. const score = 1 - ratio; // prefer non-ASCII
  273. if (score > bestCR) { bestCR = score; commentRowIdx = i; }
  274. }
  275. const fields = rows[fieldRowIdx] || [];
  276. const comments = rows[commentRowIdx] || [];
  277. const map = new Map();
  278. for (let i = 0; i < fields.length; i++) {
  279. const k = String(fields[i] || '').trim();
  280. if (!k) continue;
  281. const v = String(comments[i] || '').trim();
  282. if (k) map.set(k, v);
  283. }
  284. return { sheetName, comments: map };
  285. } catch (e) {
  286. return { sheetName: '', comments: new Map() };
  287. }
  288. }
  289. function findXlsForBase(xlsDir, baseName) {
  290. // Only try .xls format
  291. const xlsFile = path.join(xlsDir, baseName + '.xls');
  292. if (fs.existsSync(xlsFile)) return xlsFile;
  293. // Case-insensitive search fallback for .xls files only
  294. const files = fs.readdirSync(xlsDir).filter(n => /\.xls$/i.test(n));
  295. for (const n of files) {
  296. if (normalizeName(n) === normalizeName(baseName)) return path.join(xlsDir, n);
  297. }
  298. return null;
  299. }
  300. function generateJavaClass(pkg, className, fieldsOrder, fieldTypes, fieldComments, classComment, jsonFileName) {
  301. const lines = [];
  302. lines.push(`package ${pkg};`);
  303. lines.push('');
  304. lines.push('import lombok.Data;');
  305. // Only add imports that are actually used
  306. const usedTypes = new Set();
  307. for (const fname of fieldsOrder) {
  308. const type = fieldTypes.get(fname) || 'String';
  309. if (type === 'List<Integer>') {
  310. usedTypes.add('java.util.List');
  311. } else if (type === 'List<String>') {
  312. usedTypes.add('java.util.List');
  313. } else if (type === 'List<Object>') {
  314. usedTypes.add('java.util.List');
  315. } else if (type === 'List<BigDecimal>') {
  316. usedTypes.add('java.util.List');
  317. usedTypes.add('java.math.BigDecimal');
  318. } else if (type === 'List<Long>') {
  319. usedTypes.add('java.util.List');
  320. } else if (type === 'BigDecimal') {
  321. usedTypes.add('java.math.BigDecimal');
  322. }
  323. }
  324. // Add only the imports that are actually used
  325. usedTypes.forEach(importType => {
  326. lines.push(`import ${importType};`);
  327. });
  328. if (usedTypes.size > 0) {
  329. lines.push('');
  330. }
  331. if (classComment) {
  332. lines.push('/**');
  333. lines.push(` * 配置表:${classComment}`);
  334. lines.push(' */');
  335. }
  336. lines.push('@Data');
  337. lines.push(`public class ${className} {`);
  338. // Add path field using the actual JSON filename
  339. lines.push('\t/** JSON 配置文件路径 */');
  340. lines.push('\tpublic static final String path = "' + jsonFileName + '";');
  341. lines.push('');
  342. // Add fields
  343. for (const fname of fieldsOrder) {
  344. const type = fieldTypes.get(fname) || 'String';
  345. const cmt = fieldComments.get(fname);
  346. const safe = sanitizeFieldName(fname);
  347. if (cmt) {
  348. lines.push(`\t/** ${cmt} */`);
  349. }
  350. if (safe !== fname) {
  351. lines.push(`\t@JsonProperty("${fname}")`);
  352. }
  353. lines.push(`\tpublic ${type} ${safe};`);
  354. }
  355. lines.push('');
  356. lines.push('}');
  357. return lines.join('\n');
  358. }
  359. (function main() {
  360. if (!fs.existsSync(jsonDir)) {
  361. console.error('JSON dir not found:', jsonDir);
  362. process.exit(2);
  363. }
  364. if (!fs.existsSync(xlsDir)) {
  365. console.error('XLS dir not found:', xlsDir);
  366. process.exit(2);
  367. }
  368. const outPkgDir = path.join(outputDir, packageName.replace(/\./g, path.sep));
  369. fs.mkdirSync(outPkgDir, { recursive: true });
  370. let files = listJsonFiles(jsonDir);
  371. // Apply whitelist filter if provided
  372. if (whiteSet.size > 0) {
  373. files = files.filter(f => whiteSet.has(normalizeName(path.basename(f))));
  374. }
  375. // Apply blacklist filter if provided
  376. if (blackSet.size > 0) {
  377. files = files.filter(f => !blackSet.has(normalizeName(path.basename(f))));
  378. }
  379. if (files.length === 0) {
  380. console.log('No JSON files to process.');
  381. process.exit(0);
  382. }
  383. let generated = 0;
  384. for (const jf of files) {
  385. try {
  386. const base = path.basename(jf).replace(/\.[^.]+$/, '');
  387. const jsonRoot = readJson(jf);
  388. const sample = pickSample(jsonRoot);
  389. if (!sample || typeof sample !== 'object') { console.warn('Skip (no object sample):', jf); continue; }
  390. // 分析所有样本以获得更准确的类型推断
  391. const allSamples = analyzeAllSamples(jsonRoot);
  392. if (allSamples.length === 0) { console.warn('Skip (no samples):', jf); continue; }
  393. const xlsPath = findXlsForBase(xlsDir, base);
  394. const { sheetName, comments } = xlsPath ? readXlsComments(xlsPath) : { sheetName: '', comments: new Map() };
  395. const className = toClassName(base) + 'Pojo';
  396. const fieldsOrder = Object.keys(sample);
  397. const fieldTypes = new Map();
  398. // 使用多样本分析进行类型推断
  399. for (const fieldName of fieldsOrder) {
  400. const fieldSamples = allSamples.map(s => s[fieldName]).filter(v => v !== undefined);
  401. const inferredType = inferJavaTypeFromSamples(fieldSamples, fieldName);
  402. fieldTypes.set(fieldName, inferredType);
  403. // 调试输出
  404. if (fieldSamples.length > 0) {
  405. console.log(` ${fieldName}: ${inferredType} (samples: ${fieldSamples.length}, types: ${[...new Set(fieldSamples.map(v => typeof v))].join(', ')})`);
  406. }
  407. }
  408. const classComment = sheetName ? `${base}(${sheetName})` : `${base}`;
  409. const jsonFileName = path.basename(jf);
  410. const content = generateJavaClass(packageName, className, fieldsOrder, fieldTypes, comments, classComment, jsonFileName);
  411. const outPath = path.join(outPkgDir, `${className}.java`);
  412. fs.writeFileSync(outPath, content, 'utf8');
  413. generated++;
  414. console.log('Generated', outPath);
  415. } catch (e) {
  416. console.warn('Failed for', jf, e.message);
  417. }
  418. }
  419. console.log(`Done. Generated ${generated} classes.`);
  420. })();