From 79612be1c627c9ba1506a1b7f7b14f8d60ab7d83 Mon Sep 17 00:00:00 2001 From: gxwebsoft <170083662@qq.com> Date: Fri, 6 Feb 2026 17:46:53 +0800 Subject: [PATCH] =?UTF-8?q?feat(controller):=20=E6=96=B0=E5=A2=9E=E5=9F=BA?= =?UTF-8?q?=E4=BA=8E=E6=96=87=E6=9C=AC=E5=86=85=E5=AE=B9=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E4=BC=81=E4=B8=9A=E5=90=8D=E7=A7=B0=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 BatchImportSupport 中新增 refreshCompanyIdByCompanyNameContainedInText 方法 - 实现 AC 自动机算法进行多模式字符串匹配 - 支持从文本字段中提取包含的企业名称并回填 companyId - 添加 CompanyNameMatcher 内部类处理匹配逻辑 - 优化 CreditMediationController 使用新方法处理多方当事人字段 - 支持按租户分组避免跨租户误匹配 - 实现批量更新和事务处理机制 --- .../credit/controller/BatchImportSupport.java | 360 ++++++++++++++++++ .../controller/CreditMediationController.java | 8 +- 2 files changed, 365 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java b/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java index ca07a1d..21d3804 100644 --- a/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java +++ b/src/main/java/com/gxwebsoft/credit/controller/BatchImportSupport.java @@ -12,11 +12,15 @@ import org.springframework.transaction.support.TransactionTemplate; import org.springframework.util.CollectionUtils; import java.util.ArrayList; +import java.util.ArrayDeque; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Queue; +import java.util.Set; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -336,6 +340,183 @@ public class BatchImportSupport { return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous); } + /** + * 按“文本字段包含企业名称”的方式匹配 CreditCompany(name / matchName) 并回填 companyId。 + * + *

适用场景:某些表的“当事人/第三人”字段会包含多个角色+姓名/企业,例如: + * 申请执行人 - 张三 被执行人 - 某某有限公司。此时无法按整格等值匹配。

+ * + *

实现:按租户加载企业 name/matchName 构建多模式匹配(AC 自动机),在文本中查找出现的企业名。

+ * + *

列优先级:按 textGetters 的顺序尝试;若某列匹配到唯一企业则采用,否则继续下一列。

+ */ + @SafeVarargs + public final CompanyIdRefreshStats refreshCompanyIdByCompanyNameContainedInText(IService service, + CreditCompanyService creditCompanyService, + Integer currentTenantId, + Boolean onlyNull, + Integer limit, + SFunction idGetter, + BiConsumer idSetter, + SFunction companyIdGetter, + BiConsumer companyIdSetter, + SFunction hasDataGetter, + BiConsumer hasDataSetter, + SFunction tenantIdGetter, + Supplier patchFactory, + SFunction... textGetters) { + boolean onlyNullFlag = (onlyNull == null) || Boolean.TRUE.equals(onlyNull); + if (textGetters == null || textGetters.length == 0) { + return new CompanyIdRefreshStats(false, 0, 0, 0, 0); + } + + // 1) 读取待处理数据(仅取必要字段) + @SuppressWarnings({"rawtypes", "unchecked"}) + SFunction[] selectColumns = (SFunction[]) new SFunction[4 + textGetters.length]; + int colIdx = 0; + selectColumns[colIdx++] = idGetter; + selectColumns[colIdx++] = companyIdGetter; + selectColumns[colIdx++] = hasDataGetter; + selectColumns[colIdx++] = tenantIdGetter; + for (SFunction tg : textGetters) { + selectColumns[colIdx++] = tg; + } + + var query = service.lambdaQuery() + .select(selectColumns) + .eq(currentTenantId != null, tenantIdGetter, currentTenantId) + .and(w -> { + for (int i = 0; i < textGetters.length; i++) { + if (i == 0) { + w.isNotNull(textGetters[i]); + } else { + w.or().isNotNull(textGetters[i]); + } + } + }); + if (onlyNullFlag) { + query.and(w -> w.isNull(companyIdGetter).or().eq(companyIdGetter, 0)); + } + if (limit != null && limit > 0) { + query.last("limit " + Math.min(limit, 200000)); + } + List rows = query.list(); + if (CollectionUtils.isEmpty(rows)) { + return new CompanyIdRefreshStats(false, 0, 0, 0, 0); + } + + // 2) 按租户分组(避免跨租户误匹配) + Map> rowsByTenant = new LinkedHashMap<>(); + int missingTenant = 0; + for (T row : rows) { + if (row == null) { + continue; + } + Integer tenantId = currentTenantId != null ? currentTenantId : tenantIdGetter.apply(row); + if (tenantId == null) { + missingTenant++; + continue; + } + rowsByTenant.computeIfAbsent(tenantId, k -> new ArrayList<>()).add(row); + } + + int updated = 0; + int matched = 0; + int notFound = 0; + int ambiguous = 0; + final int batchSize = 500; + List updates = new ArrayList<>(batchSize); + + for (Map.Entry> entry : rowsByTenant.entrySet()) { + Integer tenantId = entry.getKey(); + List tenantRows = entry.getValue(); + if (tenantId == null || CollectionUtils.isEmpty(tenantRows)) { + continue; + } + + // 2.1) 构建当前租户的企业名匹配器 + List companies = creditCompanyService.lambdaQuery() + .select(CreditCompany::getId, CreditCompany::getName, CreditCompany::getMatchName, CreditCompany::getTenantId) + .eq(CreditCompany::getTenantId, tenantId) + .list(); + CompanyNameMatcher matcher = CompanyNameMatcher.build(companies); + + // 2.2) 匹配并回填 + for (T row : tenantRows) { + if (row == null) { + continue; + } + + Integer resolvedCompanyId = null; + boolean hasAmbiguous = false; + for (SFunction tg : textGetters) { + String text = tg.apply(row); + CompanyNameMatcher.MatchResult r = matcher.match(text); + if (r.ambiguous) { + hasAmbiguous = true; + continue; + } + if (r.companyId != null) { + resolvedCompanyId = r.companyId; + break; + } + } + + if (resolvedCompanyId == null) { + if (hasAmbiguous) { + ambiguous++; + } else { + notFound++; + } + continue; + } + matched++; + + Integer oldCompanyId = companyIdGetter.apply(row); + Boolean oldHasData = hasDataGetter.apply(row); + boolean needUpdate; + if (onlyNullFlag) { + needUpdate = (oldCompanyId == null) || oldCompanyId == 0; + } else { + needUpdate = oldCompanyId == null || !resolvedCompanyId.equals(oldCompanyId); + } + if (!Boolean.TRUE.equals(oldHasData)) { + needUpdate = true; + } + if (!needUpdate) { + continue; + } + + Integer id = idGetter.apply(row); + if (id == null) { + continue; + } + T patch = patchFactory.get(); + idSetter.accept(patch, id); + companyIdSetter.accept(patch, resolvedCompanyId); + hasDataSetter.accept(patch, Boolean.TRUE); + updates.add(patch); + if (updates.size() >= batchSize) { + List batch = new ArrayList<>(updates); + updates.clear(); + updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0); + } + } + } + + if (currentTenantId == null && missingTenant > 0) { + notFound += missingTenant; + } + + if (!updates.isEmpty()) { + List batch = new ArrayList<>(updates); + updates.clear(); + updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0); + } + + return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous); + } + /** * 批量 upsert:优先按 code 匹配;code 为空时按 name 匹配。 */ @@ -852,4 +1033,183 @@ public class BatchImportSupport { // SFunction 是 getter method ref,直接调用即可 return idColumn.apply(entity); } + + /** + * Multi-pattern substring matcher for company names (CreditCompany.name / matchName). + * Uses an Aho–Corasick automaton to scan each text only once. + */ + private static final class CompanyNameMatcher { + private static final int MIN_PATTERN_LEN = 4; // Avoid false positives in free text (e.g. person names) + + private static final class Node { + final Map next = new HashMap<>(); + final List out = new ArrayList<>(); + int fail = 0; + } + + private final List nodes; + private final int[] patternCompanyId; // 0 means ambiguous + + private CompanyNameMatcher(List nodes, int[] patternCompanyId) { + this.nodes = nodes; + this.patternCompanyId = patternCompanyId; + } + + static CompanyNameMatcher build(List companies) { + List nodes = new ArrayList<>(); + nodes.add(new Node()); // root + + Map patternIndex = new HashMap<>(); + List companyIds = new ArrayList<>(); + + if (!CollectionUtils.isEmpty(companies)) { + for (CreditCompany c : companies) { + if (c == null || c.getId() == null) { + continue; + } + addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getName()), c.getId()); + addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getMatchName()), c.getId()); + } + } + + int[] patternCompanyId = new int[companyIds.size()]; + for (int i = 0; i < companyIds.size(); i++) { + patternCompanyId[i] = companyIds.get(i) != null ? companyIds.get(i) : 0; + } + + buildFailureLinks(nodes); + return new CompanyNameMatcher(nodes, patternCompanyId); + } + + private static void addPattern(List nodes, + Map patternIndex, + List companyIds, + String pattern, + Integer companyId) { + if (pattern == null || companyId == null) { + return; + } + if (pattern.length() < MIN_PATTERN_LEN) { + return; + } + + Integer existingIndex = patternIndex.get(pattern); + if (existingIndex != null) { + // Same pattern maps to multiple companies -> mark ambiguous. + Integer oldCompanyId = companyIds.get(existingIndex); + if (oldCompanyId != null && !oldCompanyId.equals(companyId)) { + companyIds.set(existingIndex, null); + } + return; + } + + int state = 0; + for (int i = 0; i < pattern.length(); i++) { + char ch = pattern.charAt(i); + Integer next = nodes.get(state).next.get(ch); + if (next == null) { + next = nodes.size(); + nodes.get(state).next.put(ch, next); + nodes.add(new Node()); + } + state = next; + } + int idx = companyIds.size(); + companyIds.add(companyId); + nodes.get(state).out.add(idx); + patternIndex.put(pattern, idx); + } + + private static void buildFailureLinks(List nodes) { + Queue q = new ArrayDeque<>(); + // Init depth-1 nodes + for (Map.Entry e : nodes.get(0).next.entrySet()) { + int s = e.getValue(); + nodes.get(s).fail = 0; + q.add(s); + } + while (!q.isEmpty()) { + int r = q.poll(); + for (Map.Entry e : nodes.get(r).next.entrySet()) { + char a = e.getKey(); + int s = e.getValue(); + q.add(s); + + int state = nodes.get(r).fail; + while (state != 0 && !nodes.get(state).next.containsKey(a)) { + state = nodes.get(state).fail; + } + Integer fs = nodes.get(state).next.get(a); + nodes.get(s).fail = (fs != null) ? fs : 0; + // Merge outputs from fail state + List out = nodes.get(nodes.get(s).fail).out; + if (!out.isEmpty()) { + nodes.get(s).out.addAll(out); + } + } + } + } + + static final class MatchResult { + final Integer companyId; // unique match + final boolean ambiguous; + + MatchResult(Integer companyId, boolean ambiguous) { + this.companyId = companyId; + this.ambiguous = ambiguous; + } + } + + MatchResult match(String text) { + String v = normalizeCompanyName(text); + if (v == null) { + return new MatchResult(null, false); + } + + int state = 0; + Set matchedIds = new HashSet<>(); + boolean ambiguous = false; + for (int i = 0; i < v.length(); i++) { + char ch = v.charAt(i); + while (state != 0 && !nodes.get(state).next.containsKey(ch)) { + state = nodes.get(state).fail; + } + Integer next = nodes.get(state).next.get(ch); + state = next != null ? next : 0; + + List out = nodes.get(state).out; + if (out.isEmpty()) { + continue; + } + for (Integer idx : out) { + if (idx == null || idx < 0 || idx >= patternCompanyId.length) { + continue; + } + int cid = patternCompanyId[idx]; + if (cid == 0) { + ambiguous = true; + } else { + matchedIds.add(cid); + if (matchedIds.size() > 1) { + ambiguous = true; + } + } + } + if (ambiguous) { + // Keep scanning to consume input, but we can early-exit for performance. + // For refresh use-case, ambiguous means we won't update this row. + // Still, continue is safe; break reduces CPU. + break; + } + } + + if (ambiguous) { + return new MatchResult(null, true); + } + if (matchedIds.size() == 1) { + return new MatchResult(matchedIds.iterator().next(), false); + } + return new MatchResult(null, false); + } + } } diff --git a/src/main/java/com/gxwebsoft/credit/controller/CreditMediationController.java b/src/main/java/com/gxwebsoft/credit/controller/CreditMediationController.java index 1f0e25a..9f45e32 100644 --- a/src/main/java/com/gxwebsoft/credit/controller/CreditMediationController.java +++ b/src/main/java/com/gxwebsoft/credit/controller/CreditMediationController.java @@ -159,7 +159,8 @@ public class CreditMediationController extends BaseController { User loginUser = getLoginUser(); Integer currentTenantId = loginUser != null ? loginUser.getTenantId() : null; - BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyName( + // Special: otherPartiesThirdParty may contain multiple roles + names; match if any company name is contained in the text. + BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyNameContainedInText( creditMediationService, creditCompanyService, currentTenantId, @@ -167,13 +168,14 @@ public class CreditMediationController extends BaseController { limit, CreditMediation::getId, CreditMediation::setId, - CreditMediation::getAppellee, CreditMediation::getCompanyId, CreditMediation::setCompanyId, CreditMediation::getHasData, CreditMediation::setHasData, CreditMediation::getTenantId, - CreditMediation::new + CreditMediation::new, + CreditMediation::getOtherPartiesThirdParty, + CreditMediation::getAppellee ); if (!stats.anyDataRead) {