feat(controller): 新增基于文本内容匹配企业名称的功能
- 在 BatchImportSupport 中新增 refreshCompanyIdByCompanyNameContainedInText 方法 - 实现 AC 自动机算法进行多模式字符串匹配 - 支持从文本字段中提取包含的企业名称并回填 companyId - 添加 CompanyNameMatcher 内部类处理匹配逻辑 - 优化 CreditMediationController 使用新方法处理多方当事人字段 - 支持按租户分组避免跨租户误匹配 - 实现批量更新和事务处理机制
This commit is contained in:
@@ -12,11 +12,15 @@ import org.springframework.transaction.support.TransactionTemplate;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
@@ -336,6 +340,183 @@ public class BatchImportSupport {
|
||||
return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous);
|
||||
}
|
||||
|
||||
/**
|
||||
* 按“文本字段包含企业名称”的方式匹配 CreditCompany(name / matchName) 并回填 companyId。
|
||||
*
|
||||
* <p>适用场景:某些表的“当事人/第三人”字段会包含多个角色+姓名/企业,例如:
|
||||
* 申请执行人 - 张三 被执行人 - 某某有限公司。此时无法按整格等值匹配。</p>
|
||||
*
|
||||
* <p>实现:按租户加载企业 name/matchName 构建多模式匹配(AC 自动机),在文本中查找出现的企业名。</p>
|
||||
*
|
||||
* <p>列优先级:按 textGetters 的顺序尝试;若某列匹配到唯一企业则采用,否则继续下一列。</p>
|
||||
*/
|
||||
@SafeVarargs
|
||||
public final <T> CompanyIdRefreshStats refreshCompanyIdByCompanyNameContainedInText(IService<T> service,
|
||||
CreditCompanyService creditCompanyService,
|
||||
Integer currentTenantId,
|
||||
Boolean onlyNull,
|
||||
Integer limit,
|
||||
SFunction<T, Integer> idGetter,
|
||||
BiConsumer<T, Integer> idSetter,
|
||||
SFunction<T, Integer> companyIdGetter,
|
||||
BiConsumer<T, Integer> companyIdSetter,
|
||||
SFunction<T, Boolean> hasDataGetter,
|
||||
BiConsumer<T, Boolean> hasDataSetter,
|
||||
SFunction<T, Integer> tenantIdGetter,
|
||||
Supplier<T> patchFactory,
|
||||
SFunction<T, String>... textGetters) {
|
||||
boolean onlyNullFlag = (onlyNull == null) || Boolean.TRUE.equals(onlyNull);
|
||||
if (textGetters == null || textGetters.length == 0) {
|
||||
return new CompanyIdRefreshStats(false, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// 1) 读取待处理数据(仅取必要字段)
|
||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||
SFunction<T, ?>[] selectColumns = (SFunction<T, ?>[]) new SFunction[4 + textGetters.length];
|
||||
int colIdx = 0;
|
||||
selectColumns[colIdx++] = idGetter;
|
||||
selectColumns[colIdx++] = companyIdGetter;
|
||||
selectColumns[colIdx++] = hasDataGetter;
|
||||
selectColumns[colIdx++] = tenantIdGetter;
|
||||
for (SFunction<T, String> tg : textGetters) {
|
||||
selectColumns[colIdx++] = tg;
|
||||
}
|
||||
|
||||
var query = service.lambdaQuery()
|
||||
.select(selectColumns)
|
||||
.eq(currentTenantId != null, tenantIdGetter, currentTenantId)
|
||||
.and(w -> {
|
||||
for (int i = 0; i < textGetters.length; i++) {
|
||||
if (i == 0) {
|
||||
w.isNotNull(textGetters[i]);
|
||||
} else {
|
||||
w.or().isNotNull(textGetters[i]);
|
||||
}
|
||||
}
|
||||
});
|
||||
if (onlyNullFlag) {
|
||||
query.and(w -> w.isNull(companyIdGetter).or().eq(companyIdGetter, 0));
|
||||
}
|
||||
if (limit != null && limit > 0) {
|
||||
query.last("limit " + Math.min(limit, 200000));
|
||||
}
|
||||
List<T> rows = query.list();
|
||||
if (CollectionUtils.isEmpty(rows)) {
|
||||
return new CompanyIdRefreshStats(false, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
// 2) 按租户分组(避免跨租户误匹配)
|
||||
Map<Integer, List<T>> rowsByTenant = new LinkedHashMap<>();
|
||||
int missingTenant = 0;
|
||||
for (T row : rows) {
|
||||
if (row == null) {
|
||||
continue;
|
||||
}
|
||||
Integer tenantId = currentTenantId != null ? currentTenantId : tenantIdGetter.apply(row);
|
||||
if (tenantId == null) {
|
||||
missingTenant++;
|
||||
continue;
|
||||
}
|
||||
rowsByTenant.computeIfAbsent(tenantId, k -> new ArrayList<>()).add(row);
|
||||
}
|
||||
|
||||
int updated = 0;
|
||||
int matched = 0;
|
||||
int notFound = 0;
|
||||
int ambiguous = 0;
|
||||
final int batchSize = 500;
|
||||
List<T> updates = new ArrayList<>(batchSize);
|
||||
|
||||
for (Map.Entry<Integer, List<T>> entry : rowsByTenant.entrySet()) {
|
||||
Integer tenantId = entry.getKey();
|
||||
List<T> tenantRows = entry.getValue();
|
||||
if (tenantId == null || CollectionUtils.isEmpty(tenantRows)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2.1) 构建当前租户的企业名匹配器
|
||||
List<CreditCompany> companies = creditCompanyService.lambdaQuery()
|
||||
.select(CreditCompany::getId, CreditCompany::getName, CreditCompany::getMatchName, CreditCompany::getTenantId)
|
||||
.eq(CreditCompany::getTenantId, tenantId)
|
||||
.list();
|
||||
CompanyNameMatcher matcher = CompanyNameMatcher.build(companies);
|
||||
|
||||
// 2.2) 匹配并回填
|
||||
for (T row : tenantRows) {
|
||||
if (row == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Integer resolvedCompanyId = null;
|
||||
boolean hasAmbiguous = false;
|
||||
for (SFunction<T, String> tg : textGetters) {
|
||||
String text = tg.apply(row);
|
||||
CompanyNameMatcher.MatchResult r = matcher.match(text);
|
||||
if (r.ambiguous) {
|
||||
hasAmbiguous = true;
|
||||
continue;
|
||||
}
|
||||
if (r.companyId != null) {
|
||||
resolvedCompanyId = r.companyId;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (resolvedCompanyId == null) {
|
||||
if (hasAmbiguous) {
|
||||
ambiguous++;
|
||||
} else {
|
||||
notFound++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
matched++;
|
||||
|
||||
Integer oldCompanyId = companyIdGetter.apply(row);
|
||||
Boolean oldHasData = hasDataGetter.apply(row);
|
||||
boolean needUpdate;
|
||||
if (onlyNullFlag) {
|
||||
needUpdate = (oldCompanyId == null) || oldCompanyId == 0;
|
||||
} else {
|
||||
needUpdate = oldCompanyId == null || !resolvedCompanyId.equals(oldCompanyId);
|
||||
}
|
||||
if (!Boolean.TRUE.equals(oldHasData)) {
|
||||
needUpdate = true;
|
||||
}
|
||||
if (!needUpdate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Integer id = idGetter.apply(row);
|
||||
if (id == null) {
|
||||
continue;
|
||||
}
|
||||
T patch = patchFactory.get();
|
||||
idSetter.accept(patch, id);
|
||||
companyIdSetter.accept(patch, resolvedCompanyId);
|
||||
hasDataSetter.accept(patch, Boolean.TRUE);
|
||||
updates.add(patch);
|
||||
if (updates.size() >= batchSize) {
|
||||
List<T> batch = new ArrayList<>(updates);
|
||||
updates.clear();
|
||||
updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentTenantId == null && missingTenant > 0) {
|
||||
notFound += missingTenant;
|
||||
}
|
||||
|
||||
if (!updates.isEmpty()) {
|
||||
List<T> batch = new ArrayList<>(updates);
|
||||
updates.clear();
|
||||
updated += runInNewTx(() -> service.updateBatchById(batch, batchSize) ? batch.size() : 0);
|
||||
}
|
||||
|
||||
return new CompanyIdRefreshStats(true, updated, matched, notFound, ambiguous);
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量 upsert:优先按 code 匹配;code 为空时按 name 匹配。
|
||||
*/
|
||||
@@ -852,4 +1033,183 @@ public class BatchImportSupport {
|
||||
// SFunction 是 getter method ref,直接调用即可
|
||||
return idColumn.apply(entity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multi-pattern substring matcher for company names (CreditCompany.name / matchName).
|
||||
* Uses an Aho–Corasick automaton to scan each text only once.
|
||||
*/
|
||||
private static final class CompanyNameMatcher {
|
||||
private static final int MIN_PATTERN_LEN = 4; // Avoid false positives in free text (e.g. person names)
|
||||
|
||||
private static final class Node {
|
||||
final Map<Character, Integer> next = new HashMap<>();
|
||||
final List<Integer> out = new ArrayList<>();
|
||||
int fail = 0;
|
||||
}
|
||||
|
||||
private final List<Node> nodes;
|
||||
private final int[] patternCompanyId; // 0 means ambiguous
|
||||
|
||||
private CompanyNameMatcher(List<Node> nodes, int[] patternCompanyId) {
|
||||
this.nodes = nodes;
|
||||
this.patternCompanyId = patternCompanyId;
|
||||
}
|
||||
|
||||
static CompanyNameMatcher build(List<CreditCompany> companies) {
|
||||
List<Node> nodes = new ArrayList<>();
|
||||
nodes.add(new Node()); // root
|
||||
|
||||
Map<String, Integer> patternIndex = new HashMap<>();
|
||||
List<Integer> companyIds = new ArrayList<>();
|
||||
|
||||
if (!CollectionUtils.isEmpty(companies)) {
|
||||
for (CreditCompany c : companies) {
|
||||
if (c == null || c.getId() == null) {
|
||||
continue;
|
||||
}
|
||||
addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getName()), c.getId());
|
||||
addPattern(nodes, patternIndex, companyIds, normalizeCompanyName(c.getMatchName()), c.getId());
|
||||
}
|
||||
}
|
||||
|
||||
int[] patternCompanyId = new int[companyIds.size()];
|
||||
for (int i = 0; i < companyIds.size(); i++) {
|
||||
patternCompanyId[i] = companyIds.get(i) != null ? companyIds.get(i) : 0;
|
||||
}
|
||||
|
||||
buildFailureLinks(nodes);
|
||||
return new CompanyNameMatcher(nodes, patternCompanyId);
|
||||
}
|
||||
|
||||
private static void addPattern(List<Node> nodes,
|
||||
Map<String, Integer> patternIndex,
|
||||
List<Integer> companyIds,
|
||||
String pattern,
|
||||
Integer companyId) {
|
||||
if (pattern == null || companyId == null) {
|
||||
return;
|
||||
}
|
||||
if (pattern.length() < MIN_PATTERN_LEN) {
|
||||
return;
|
||||
}
|
||||
|
||||
Integer existingIndex = patternIndex.get(pattern);
|
||||
if (existingIndex != null) {
|
||||
// Same pattern maps to multiple companies -> mark ambiguous.
|
||||
Integer oldCompanyId = companyIds.get(existingIndex);
|
||||
if (oldCompanyId != null && !oldCompanyId.equals(companyId)) {
|
||||
companyIds.set(existingIndex, null);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int state = 0;
|
||||
for (int i = 0; i < pattern.length(); i++) {
|
||||
char ch = pattern.charAt(i);
|
||||
Integer next = nodes.get(state).next.get(ch);
|
||||
if (next == null) {
|
||||
next = nodes.size();
|
||||
nodes.get(state).next.put(ch, next);
|
||||
nodes.add(new Node());
|
||||
}
|
||||
state = next;
|
||||
}
|
||||
int idx = companyIds.size();
|
||||
companyIds.add(companyId);
|
||||
nodes.get(state).out.add(idx);
|
||||
patternIndex.put(pattern, idx);
|
||||
}
|
||||
|
||||
private static void buildFailureLinks(List<Node> nodes) {
|
||||
Queue<Integer> q = new ArrayDeque<>();
|
||||
// Init depth-1 nodes
|
||||
for (Map.Entry<Character, Integer> e : nodes.get(0).next.entrySet()) {
|
||||
int s = e.getValue();
|
||||
nodes.get(s).fail = 0;
|
||||
q.add(s);
|
||||
}
|
||||
while (!q.isEmpty()) {
|
||||
int r = q.poll();
|
||||
for (Map.Entry<Character, Integer> e : nodes.get(r).next.entrySet()) {
|
||||
char a = e.getKey();
|
||||
int s = e.getValue();
|
||||
q.add(s);
|
||||
|
||||
int state = nodes.get(r).fail;
|
||||
while (state != 0 && !nodes.get(state).next.containsKey(a)) {
|
||||
state = nodes.get(state).fail;
|
||||
}
|
||||
Integer fs = nodes.get(state).next.get(a);
|
||||
nodes.get(s).fail = (fs != null) ? fs : 0;
|
||||
// Merge outputs from fail state
|
||||
List<Integer> out = nodes.get(nodes.get(s).fail).out;
|
||||
if (!out.isEmpty()) {
|
||||
nodes.get(s).out.addAll(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final class MatchResult {
|
||||
final Integer companyId; // unique match
|
||||
final boolean ambiguous;
|
||||
|
||||
MatchResult(Integer companyId, boolean ambiguous) {
|
||||
this.companyId = companyId;
|
||||
this.ambiguous = ambiguous;
|
||||
}
|
||||
}
|
||||
|
||||
MatchResult match(String text) {
|
||||
String v = normalizeCompanyName(text);
|
||||
if (v == null) {
|
||||
return new MatchResult(null, false);
|
||||
}
|
||||
|
||||
int state = 0;
|
||||
Set<Integer> matchedIds = new HashSet<>();
|
||||
boolean ambiguous = false;
|
||||
for (int i = 0; i < v.length(); i++) {
|
||||
char ch = v.charAt(i);
|
||||
while (state != 0 && !nodes.get(state).next.containsKey(ch)) {
|
||||
state = nodes.get(state).fail;
|
||||
}
|
||||
Integer next = nodes.get(state).next.get(ch);
|
||||
state = next != null ? next : 0;
|
||||
|
||||
List<Integer> out = nodes.get(state).out;
|
||||
if (out.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
for (Integer idx : out) {
|
||||
if (idx == null || idx < 0 || idx >= patternCompanyId.length) {
|
||||
continue;
|
||||
}
|
||||
int cid = patternCompanyId[idx];
|
||||
if (cid == 0) {
|
||||
ambiguous = true;
|
||||
} else {
|
||||
matchedIds.add(cid);
|
||||
if (matchedIds.size() > 1) {
|
||||
ambiguous = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ambiguous) {
|
||||
// Keep scanning to consume input, but we can early-exit for performance.
|
||||
// For refresh use-case, ambiguous means we won't update this row.
|
||||
// Still, continue is safe; break reduces CPU.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ambiguous) {
|
||||
return new MatchResult(null, true);
|
||||
}
|
||||
if (matchedIds.size() == 1) {
|
||||
return new MatchResult(matchedIds.iterator().next(), false);
|
||||
}
|
||||
return new MatchResult(null, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,7 +159,8 @@ public class CreditMediationController extends BaseController {
|
||||
User loginUser = getLoginUser();
|
||||
Integer currentTenantId = loginUser != null ? loginUser.getTenantId() : null;
|
||||
|
||||
BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyName(
|
||||
// Special: otherPartiesThirdParty may contain multiple roles + names; match if any company name is contained in the text.
|
||||
BatchImportSupport.CompanyIdRefreshStats stats = batchImportSupport.refreshCompanyIdByCompanyNameContainedInText(
|
||||
creditMediationService,
|
||||
creditCompanyService,
|
||||
currentTenantId,
|
||||
@@ -167,13 +168,14 @@ public class CreditMediationController extends BaseController {
|
||||
limit,
|
||||
CreditMediation::getId,
|
||||
CreditMediation::setId,
|
||||
CreditMediation::getAppellee,
|
||||
CreditMediation::getCompanyId,
|
||||
CreditMediation::setCompanyId,
|
||||
CreditMediation::getHasData,
|
||||
CreditMediation::setHasData,
|
||||
CreditMediation::getTenantId,
|
||||
CreditMediation::new
|
||||
CreditMediation::new,
|
||||
CreditMediation::getOtherPartiesThirdParty,
|
||||
CreditMediation::getAppellee
|
||||
);
|
||||
|
||||
if (!stats.anyDataRead) {
|
||||
|
||||
Reference in New Issue
Block a user