feat(controller): 新增基于文本内容匹配企业名称的功能

- 在 BatchImportSupport 中新增 refreshCompanyIdByCompanyNameContainedInText 方法
- 实现 AC 自动机算法进行多模式字符串匹配
- 支持从文本字段中提取包含的企业名称并回填 companyId
- 添加 CompanyNameMatcher 内部类处理匹配逻辑
- 优化 CreditMediationController 使用新方法处理多方当事人字段
- 支持按租户分组避免跨租户误匹配
- 实现批量更新和事务处理机制
This commit is contained in:
2026-02-06 17:46:53 +08:00
parent 1b2d09049a
commit 79612be1c6
2 changed files with 365 additions and 3 deletions

View File

@@ -12,11 +12,15 @@ import org.springframework.transaction.support.TransactionTemplate;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.ArrayDeque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
@@ -336,6 +340,183 @@ public class BatchImportSupport {
return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous);
}
/**
* 按“文本字段包含企业名称”的方式匹配 CreditCompany(name / matchName) 并回填 companyId。
*
* <p>适用场景:某些表的“当事人/第三人”字段会包含多个角色+姓名/企业,例如:
* 申请执行人 - 张三 被执行人 - 某某有限公司。此时无法按整格等值匹配。</p>
*
* <p>实现:按租户加载企业 name/matchName 构建多模式匹配AC 自动机),在文本中查找出现的企业名。</p>
*
* <p>列优先级:按 textGetters 的顺序尝试;若某列匹配到唯一企业则采用,否则继续下一列。</p>
*/
@SafeVarargs
public final <T> CompanyIdRefreshStats refreshCompanyIdByCompanyNameContainedInText(IService<T> service,
CreditCompanyService creditCompanyService,
Integer currentTenantId,
Boolean onlyNull,
Integer limit,
SFunction<T, Integer> idGetter,
BiConsumer<T, Integer> idSetter,
SFunction<T, Integer> companyIdGetter,
BiConsumer<T, Integer> companyIdSetter,
SFunction<T, Boolean> hasDataGetter,
BiConsumer<T, Boolean> hasDataSetter,
SFunction<T, Integer> tenantIdGetter,
Supplier<T> patchFactory,
SFunction<T, String>... textGetters) {
boolean onlyNullFlag = (onlyNull == null) || Boolean.TRUE.equals(onlyNull);
if (textGetters == null || textGetters.length == 0) {
return new CompanyIdRefreshStats(false, 0, 0, 0, 0);
}
// 1) 读取待处理数据(仅取必要字段)
@SuppressWarnings({"rawtypes", "unchecked"})
SFunction<T, ?>[] selectColumns = (SFunction<T, ?>[]) new SFunction[4 + textGetters.length];
int colIdx = 0;
selectColumns[colIdx++] = idGetter;
selectColumns[colIdx++] = companyIdGetter;
selectColumns[colIdx++] = hasDataGetter;
selectColumns[colIdx++] = tenantIdGetter;
for (SFunction<T, String> tg : textGetters) {
selectColumns[colIdx++] = tg;
}
var query = service.lambdaQuery()
.select(selectColumns)
.eq(currentTenantId != null, tenantIdGetter, currentTenantId)
.and(w -> {
for (int i = 0; i < textGetters.length; i++) {
if (i == 0) {
w.isNotNull(textGetters[i]);
} else {
w.or().isNotNull(textGetters[i]);
}
}
});
if (onlyNullFlag) {
query.and(w -> w.isNull(companyIdGetter).or().eq(companyIdGetter, 0));
}
if (limit != null && limit > 0) {
query.last("limit " + Math.min(limit, 200000));
}
List<T> rows = query.list();
if (CollectionUtils.isEmpty(rows)) {
return new CompanyIdRefreshStats(false, 0, 0, 0, 0);
}
// 2) 按租户分组(避免跨租户误匹配)
Map<Integer, List<T>> rowsByTenant = new LinkedHashMap<>();
int missingTenant = 0;
for (T row : rows) {
if (row == null) {
continue;
}
Integer tenantId = currentTenantId != null ? currentTenantId : tenantIdGetter.apply(row);
if (tenantId == null) {
missingTenant++;
continue;
}
rowsByTenant.computeIfAbsent(tenantId, k -> new ArrayList<>()).add(row);
}
int updated = 0;
int matched = 0;
int notFound = 0;
int ambiguous = 0;
final int batchSize = 500;
List<T> updates = new ArrayList<>(batchSize);
for (Map.Entry<Integer, List<T>> entry : rowsByTenant.entrySet()) {
Integer tenantId = entry.getKey();
List<T> tenantRows = entry.getValue();
if (tenantId == null || CollectionUtils.isEmpty(tenantRows)) {
continue;
}
// 2.1) 构建当前租户的企业名匹配器
List<CreditCompany> companies = creditCompanyService.lambdaQuery()
.select(CreditCompany::getId, CreditCompany::getName, CreditCompany::getMatchName, CreditCompany::getTenantId)
.eq(CreditCompany::getTenantId, tenantId)
.list();
CompanyNameMatcher matcher = CompanyNameMatcher.build(companies);
// 2.2) 匹配并回填
for (T row : tenantRows) {
if (row == null) {
continue;
}
Integer resolvedCompanyId = null;
boolean hasAmbiguous = false;
for (SFunction<T, String> tg : textGetters) {
String text = tg.apply(row);
CompanyNameMatcher.MatchResult r = matcher.match(text);
if (r.ambiguous) {
hasAmbiguous = true;
continue;
}
if (r.companyId != null) {
resolvedCompanyId = r.companyId;
break;
}
}
if (resolvedCompanyId == null) {
if (hasAmbiguous) {
ambiguous++;
} else {
notFound++;
}
continue;
}
matched++;
Integer oldCompanyId = companyIdGetter.apply(row);
Boolean oldHasData = hasDataGetter.apply(row);
boolean needUpdate;
if (onlyNullFlag) {
needUpdate = (oldCompanyId == null) || oldCompanyId == 0;
} else {
needUpdate = oldCompanyId == null || !resolvedCompanyId.equals(oldCompanyId);
}
if (!Boolean.TRUE.equals(oldHasData)) {
needUpdate = true;
}
if (!needUpdate) {
continue;
}
Integer id = idGetter.apply(row);
if (id == null) {
continue;
}
T patch = patchFactory.get();
idSetter.accept(patch, id);
companyIdSetter.accept(patch, resolvedCompanyId);
hasDataSetter.accept(patch, Boolean.TRUE);
updates.add(patch);
if (updates.size() >= batchSize) {
List<T> batch = new ArrayList<>(updates);
updates.clear();
updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0);
}
}
}
if (currentTenantId == null && missingTenant > 0) {
notFound += missingTenant;
}
if (!updates.isEmpty()) {
List<T> batch = new ArrayList<>(updates);
updates.clear();
updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0);
}
return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous);
}
/**
* 批量 upsert优先按 code 匹配code 为空时按 name 匹配。
*/
@@ -852,4 +1033,183 @@ public class BatchImportSupport {
// SFunction 是 getter method ref直接调用即可
return idColumn.apply(entity);
}
/**
* Multi-pattern substring matcher for company names (CreditCompany.name / matchName).
* Uses an AhoCorasick automaton to scan each text only once.
*/
private static final class CompanyNameMatcher {
private static final int MIN_PATTERN_LEN = 4; // Avoid false positives in free text (e.g. person names)
private static final class Node {
final Map<Character, Integer> next = new HashMap<>();
final List<Integer> out = new ArrayList<>();
int fail = 0;
}
private final List<Node> nodes;
private final int[] patternCompanyId; // 0 means ambiguous
private CompanyNameMatcher(List<Node> nodes, int[] patternCompanyId) {
this.nodes = nodes;
this.patternCompanyId = patternCompanyId;
}
static CompanyNameMatcher build(List<CreditCompany> companies) {
List<Node> nodes = new ArrayList<>();
nodes.add(new Node()); // root
Map<String, Integer> patternIndex = new HashMap<>();
List<Integer> companyIds = new ArrayList<>();
if (!CollectionUtils.isEmpty(companies)) {
for (CreditCompany c : companies) {
if (c == null || c.getId() == null) {
continue;
}
addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getName()), c.getId());
addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getMatchName()), c.getId());
}
}
int[] patternCompanyId = new int[companyIds.size()];
for (int i = 0; i < companyIds.size(); i++) {
patternCompanyId[i] = companyIds.get(i) != null ? companyIds.get(i) : 0;
}
buildFailureLinks(nodes);
return new CompanyNameMatcher(nodes, patternCompanyId);
}
private static void addPattern(List<Node> nodes,
Map<String, Integer> patternIndex,
List<Integer> companyIds,
String pattern,
Integer companyId) {
if (pattern == null || companyId == null) {
return;
}
if (pattern.length() < MIN_PATTERN_LEN) {
return;
}
Integer existingIndex = patternIndex.get(pattern);
if (existingIndex != null) {
// Same pattern maps to multiple companies -> mark ambiguous.
Integer oldCompanyId = companyIds.get(existingIndex);
if (oldCompanyId != null && !oldCompanyId.equals(companyId)) {
companyIds.set(existingIndex, null);
}
return;
}
int state = 0;
for (int i = 0; i < pattern.length(); i++) {
char ch = pattern.charAt(i);
Integer next = nodes.get(state).next.get(ch);
if (next == null) {
next = nodes.size();
nodes.get(state).next.put(ch, next);
nodes.add(new Node());
}
state = next;
}
int idx = companyIds.size();
companyIds.add(companyId);
nodes.get(state).out.add(idx);
patternIndex.put(pattern, idx);
}
private static void buildFailureLinks(List<Node> nodes) {
Queue<Integer> q = new ArrayDeque<>();
// Init depth-1 nodes
for (Map.Entry<Character, Integer> e : nodes.get(0).next.entrySet()) {
int s = e.getValue();
nodes.get(s).fail = 0;
q.add(s);
}
while (!q.isEmpty()) {
int r = q.poll();
for (Map.Entry<Character, Integer> e : nodes.get(r).next.entrySet()) {
char a = e.getKey();
int s = e.getValue();
q.add(s);
int state = nodes.get(r).fail;
while (state != 0 && !nodes.get(state).next.containsKey(a)) {
state = nodes.get(state).fail;
}
Integer fs = nodes.get(state).next.get(a);
nodes.get(s).fail = (fs != null) ? fs : 0;
// Merge outputs from fail state
List<Integer> out = nodes.get(nodes.get(s).fail).out;
if (!out.isEmpty()) {
nodes.get(s).out.addAll(out);
}
}
}
}
static final class MatchResult {
final Integer companyId; // unique match
final boolean ambiguous;
MatchResult(Integer companyId, boolean ambiguous) {
this.companyId = companyId;
this.ambiguous = ambiguous;
}
}
MatchResult match(String text) {
String v = normalizeCompanyName(text);
if (v == null) {
return new MatchResult(null, false);
}
int state = 0;
Set<Integer> matchedIds = new HashSet<>();
boolean ambiguous = false;
for (int i = 0; i < v.length(); i++) {
char ch = v.charAt(i);
while (state != 0 && !nodes.get(state).next.containsKey(ch)) {
state = nodes.get(state).fail;
}
Integer next = nodes.get(state).next.get(ch);
state = next != null ? next : 0;
List<Integer> out = nodes.get(state).out;
if (out.isEmpty()) {
continue;
}
for (Integer idx : out) {
if (idx == null || idx < 0 || idx >= patternCompanyId.length) {
continue;
}
int cid = patternCompanyId[idx];
if (cid == 0) {
ambiguous = true;
} else {
matchedIds.add(cid);
if (matchedIds.size() > 1) {
ambiguous = true;
}
}
}
if (ambiguous) {
// Keep scanning to consume input, but we can early-exit for performance.
// For refresh use-case, ambiguous means we won't update this row.
// Still, continue is safe; break reduces CPU.
break;
}
}
if (ambiguous) {
return new MatchResult(null, true);
}
if (matchedIds.size() == 1) {
return new MatchResult(matchedIds.iterator().next(), false);
}
return new MatchResult(null, false);
}
}
}

View File

@@ -159,7 +159,8 @@ public class CreditMediationController extends BaseController {
User loginUser = getLoginUser();
Integer currentTenantId = loginUser != null ? loginUser.getTenantId() : null;
BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyName(
// Special: otherPartiesThirdParty may contain multiple roles + names; match if any company name is contained in the text.
BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyNameContainedInText(
creditMediationService,
creditCompanyService,
currentTenantId,
@@ -167,13 +168,14 @@ public class CreditMediationController extends BaseController {
limit,
CreditMediation::getId,
CreditMediation::setId,
CreditMediation::getAppellee,
CreditMediation::getCompanyId,
CreditMediation::setCompanyId,
CreditMediation::getHasData,
CreditMediation::setHasData,
CreditMediation::getTenantId,
CreditMediation::new
CreditMediation::new,
CreditMediation::getOtherPartiesThirdParty,
CreditMediation::getAppellee
);
if (!stats.anyDataRead) {