From 6b401b82863e8a29af944bcbb260b4ff767e679c Mon Sep 17 00:00:00 2001 From: gxwebsoft <170083662@qq.com> Date: Fri, 6 Feb 2026 18:27:13 +0800 Subject: [PATCH] =?UTF-8?q?refactor(batch-import):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=85=AC=E5=8F=B8=E5=90=8D=E7=A7=B0=E5=8C=B9=E9=85=8D=E7=AE=97?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除未使用的 HashSet 和 Set 导入 - 添加 patternLen 字段用于存储模式长度信息 - 修改 CompanyNameMatcher 构造函数以接收 patternLen 参数 - 在构建匹配器时收集并存储每个模式的长度 - 替换原有的 matchedIds 集合匹配逻辑 - 实现基于位置和长度的最优匹配选择算法 - 优先选择更长、更具体的匹配结果 - 处理相同位置不同长度的匹配冲突情况 - 改进模糊匹配的判断逻辑和性能表现 --- .../credit/controller/BatchImportSupport.java | 62 +++++++++++++------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java b/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java index 21d3804..7464b36 100644 --- a/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java +++ b/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java @@ -14,13 +14,11 @@ import org.springframework.util.CollectionUtils; import java.util.ArrayList; import java.util.ArrayDeque; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Queue; -import java.util.Set; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -1049,10 +1047,12 @@ public class BatchImportSupport { private final List nodes; private final int[] patternCompanyId; // 0 means ambiguous + private final int[] patternLen; - private CompanyNameMatcher(List nodes, int[] patternCompanyId) { + private CompanyNameMatcher(List nodes, int[] patternCompanyId, int[] patternLen) { this.nodes = nodes; this.patternCompanyId = patternCompanyId; + this.patternLen = patternLen; } static CompanyNameMatcher build(List companies) { @@ -1061,14 +1061,15 @@ public class BatchImportSupport { Map patternIndex = new HashMap<>(); List companyIds = new ArrayList<>(); + List patternLens = new ArrayList<>(); if (!CollectionUtils.isEmpty(companies)) { for (CreditCompany c : companies) { if (c == null || c.getId() == null) { continue; } - addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getName()), c.getId()); - addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getMatchName()), c.getId()); + addPattern(nodes, patternIndex, companyIds, patternLens, normalizeCompanyName(c.getName()), c.getId()); + addPattern(nodes, patternIndex, companyIds, patternLens, normalizeCompanyName(c.getMatchName()), c.getId()); } } @@ -1076,14 +1077,19 @@ public class BatchImportSupport { for (int i = 0; i < companyIds.size(); i++) { patternCompanyId[i] = companyIds.get(i) != null ? companyIds.get(i) : 0; } + int[] patternLen = new int[patternLens.size()]; + for (int i = 0; i < patternLens.size(); i++) { + patternLen[i] = patternLens.get(i) != null ? patternLens.get(i) : 0; + } buildFailureLinks(nodes); - return new CompanyNameMatcher(nodes, patternCompanyId); + return new CompanyNameMatcher(nodes, patternCompanyId, patternLen); } private static void addPattern(List nodes, Map patternIndex, List companyIds, + List patternLens, String pattern, Integer companyId) { if (pattern == null || companyId == null) { @@ -1116,6 +1122,7 @@ public class BatchImportSupport { } int idx = companyIds.size(); companyIds.add(companyId); + patternLens.add(pattern.length()); nodes.get(state).out.add(idx); patternIndex.put(pattern, idx); } @@ -1167,7 +1174,9 @@ public class BatchImportSupport { } int state = 0; - Set matchedIds = new HashSet<>(); + Integer bestCompanyId = null; + int bestStart = Integer.MAX_VALUE; + int bestLen = -1; boolean ambiguous = false; for (int i = 0; i < v.length(); i++) { char ch = v.charAt(i); @@ -1186,19 +1195,39 @@ public class BatchImportSupport { continue; } int cid = patternCompanyId[idx]; + // Pattern exists but maps to multiple companies -> ignore this hit, keep looking for a unique one. if (cid == 0) { - ambiguous = true; - } else { - matchedIds.add(cid); - if (matchedIds.size() > 1) { + continue; + } + int len = (idx < patternLen.length) ? patternLen[idx] : 0; + int start = len > 0 ? (i - len + 1) : i; + if (bestCompanyId == null) { + bestCompanyId = cid; + bestStart = start; + bestLen = len; + continue; + } + if (start < bestStart) { + bestCompanyId = cid; + bestStart = start; + bestLen = len; + continue; + } + if (start == bestStart) { + // Prefer the longer (more specific) match at the same position. + if (len > bestLen) { + bestCompanyId = cid; + bestLen = len; + continue; + } + // Same position + same length but different companyId -> truly ambiguous. + if (len == bestLen && !bestCompanyId.equals(cid)) { ambiguous = true; + break; } } } if (ambiguous) { - // Keep scanning to consume input, but we can early-exit for performance. - // For refresh use-case, ambiguous means we won't update this row. - // Still, continue is safe; break reduces CPU. break; } } @@ -1206,10 +1235,7 @@ public class BatchImportSupport { if (ambiguous) { return new MatchResult(null, true); } - if (matchedIds.size() == 1) { - return new MatchResult(matchedIds.iterator().next(), false); - } - return new MatchResult(null, false); + return new MatchResult(bestCompanyId, false); } } }