切换知识库工具栏

This commit is contained in:
2026-01-22 10:34:19 +08:00
parent 9689ce9e23
commit bdf4cde39f
10 changed files with 1723 additions and 1693 deletions

View File

@@ -14,7 +14,7 @@ import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.entity.AiCloudFile; import com.gxwebsoft.ai.entity.AiCloudFile;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory; import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.AiCloudFileService; import com.gxwebsoft.ai.service.AiCloudFileService;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil; import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.common.core.context.TenantContext; import com.gxwebsoft.common.core.context.TenantContext;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
@@ -146,7 +146,7 @@ public abstract class AbstractAuditContentService {
Client client = clientFactory.createClient(); Client client = clientFactory.createClient();
for (String query : queries) { for (String query : queries) {
try { try {
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query); RetrieveResponse resp = AiCloudKnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
List<RetrieveResponseBodyDataNodes> nodes = Optional.ofNullable(resp) List<RetrieveResponseBodyDataNodes> nodes = Optional.ofNullable(resp)
.map(RetrieveResponse::getBody) .map(RetrieveResponse::getBody)
.map(RetrieveResponseBody::getData) .map(RetrieveResponseBody::getData)

View File

@@ -9,7 +9,7 @@ import com.alibaba.fastjson.JSONObject;
import com.gxwebsoft.ai.config.KnowledgeBaseConfig; import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory; import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.AuditReportService; import com.gxwebsoft.ai.service.AuditReportService;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil; import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.pwl.entity.PwlProjectLibrary; import com.gxwebsoft.pwl.entity.PwlProjectLibrary;
import com.gxwebsoft.pwl.service.PwlProjectLibraryService; import com.gxwebsoft.pwl.service.PwlProjectLibraryService;
@@ -319,7 +319,7 @@ public class AuditReportServiceImpl implements AuditReportService {
try { try {
Client client = clientFactory.createClient(); Client client = clientFactory.createClient();
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query); RetrieveResponse resp = AiCloudKnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
if (resp.getBody() != null && resp.getBody().getData() != null if (resp.getBody() != null && resp.getBody().getData() != null
&& resp.getBody().getData().getNodes() != null) { && resp.getBody().getData().getNodes() != null) {

View File

@@ -1,207 +1,207 @@
package com.gxwebsoft.ai.service.impl; //package com.gxwebsoft.ai.service.impl;
//
import com.aliyun.bailian20231229.Client; //import com.aliyun.bailian20231229.Client;
import com.aliyun.bailian20231229.models.CreateIndexResponse; //import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse; //import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
import com.aliyun.bailian20231229.models.DeleteIndexResponse; //import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse; //import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse; //import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.aliyun.bailian20231229.models.RetrieveResponse; //import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.bailian20231229.models.RetrieveResponseBody.RetrieveResponseBodyDataNodes; //import com.aliyun.bailian20231229.models.RetrieveResponseBody.RetrieveResponseBodyDataNodes;
import com.gxwebsoft.ai.config.KnowledgeBaseConfig; //import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.constants.KnowledgeBaseConstants; //import com.gxwebsoft.ai.constants.KnowledgeBaseConstants;
import com.gxwebsoft.ai.dto.KnowledgeBaseRequest; //import com.gxwebsoft.ai.dto.KnowledgeBaseRequest;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory; //import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.KnowledgeBaseService; //import com.gxwebsoft.ai.service.KnowledgeBaseService;
import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil; //import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.ai.util.KnowledgeBaseUploader; //import com.gxwebsoft.ai.util.KnowledgeBaseUploader;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil; //import com.gxwebsoft.ai.util.KnowledgeBaseUtil;
import cn.hutool.core.util.StrUtil; //import cn.hutool.core.util.StrUtil;
import org.springframework.beans.factory.annotation.Autowired; //import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async; //import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service; //import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; //import org.springframework.web.multipart.MultipartFile;
//
import java.time.LocalDateTime; //import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter; //import java.time.format.DateTimeFormatter;
import java.util.Arrays; //import java.util.Arrays;
import java.util.HashMap; //import java.util.HashMap;
import java.util.LinkedHashSet; //import java.util.LinkedHashSet;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
import java.util.Set; //import java.util.Set;
//
@Service //@Service
public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { //public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
//
@Autowired // @Autowired
private KnowledgeBaseConfig config; // private KnowledgeBaseConfig config;
//
@Autowired // @Autowired
private KnowledgeBaseClientFactory clientFactory; // private KnowledgeBaseClientFactory clientFactory;
//
@Override // @Override
public Set<String> queryKnowledgeBase(KnowledgeBaseRequest req) { // public Set<String> queryKnowledgeBase(KnowledgeBaseRequest req) {
return queryKnowledgeBase(req.getKbId(), req.getQuery(), req.getTopK(), req.getFormCommit()); // return queryKnowledgeBase(req.getKbId(), req.getQuery(), req.getTopK(), req.getFormCommit());
} // }
//
@Override // @Override
public Set<String> queryKnowledgeBase(String kbId, String query, Integer topK, Integer formCommit) { // public Set<String> queryKnowledgeBase(String kbId, String query, Integer topK, Integer formCommit) {
Set<String> result = new LinkedHashSet<>(); // Set<String> result = new LinkedHashSet<>();
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
List<String> keyWords = Arrays.asList(KnowledgeBaseConstants.KEY_WORDS); // List<String> keyWords = Arrays.asList(KnowledgeBaseConstants.KEY_WORDS);
String indexId = kbId; // String indexId = kbId;
String searchQuery = StrUtil.isEmpty(query) ? keyWords.get(formCommit) : query; // String searchQuery = StrUtil.isEmpty(query) ? keyWords.get(formCommit) : query;
Integer searchTopK = topK == null ? 10 : topK; // Integer searchTopK = topK == null ? 10 : topK;
//
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, indexId, searchQuery); // RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, indexId, searchQuery);
for (RetrieveResponseBodyDataNodes node : resp.getBody().getData().getNodes()) { // for (RetrieveResponseBodyDataNodes node : resp.getBody().getData().getNodes()) {
result.add(node.getText()); // result.add(node.getText());
if (result.size() >= searchTopK) { // if (result.size() >= searchTopK) {
break; // break;
} // }
} // }
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("查询知识库失败: " + e.getMessage(), e); // throw new RuntimeException("查询知识库失败: " + e.getMessage(), e);
} // }
return result; // return result;
} // }
//
@Override // @Override
public String createKnowledgeBase(String companyName, String companyCode) { // public String createKnowledgeBase(String companyName, String companyCode) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
String kbId = getKnowledgeBaseIdByName(companyCode); // String kbId = getKnowledgeBaseIdByName(companyCode);
if(StrUtil.isNotEmpty(kbId)) { // if(StrUtil.isNotEmpty(kbId)) {
return kbId; // return kbId;
} // }
//
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
CreateIndexResponse indexResponse = KnowledgeBaseUtil.createIndex(client, workspaceId, companyCode, companyName); // CreateIndexResponse indexResponse = KnowledgeBaseUtil.createIndex(client, workspaceId, companyCode, companyName);
return indexResponse.getBody().getData().getId(); // return indexResponse.getBody().getData().getId();
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("创建知识库失败: " + e.getMessage(), e); // throw new RuntimeException("创建知识库失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public String createKnowledgeBaseTemp() { // public String createKnowledgeBaseTemp() {
String code = "Temp_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("MMddHHmmssSSS")); // String code = "Temp_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("MMddHHmmssSSS"));
return createKnowledgeBase(code, code); // return createKnowledgeBase(code, code);
} // }
//
@Override // @Override
public boolean existsKnowledgeBase(String companyCode) { // public boolean existsKnowledgeBase(String companyCode) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId); // ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
//
return indicesResponse.getBody().getData().getIndices().stream() // return indicesResponse.getBody().getData().getIndices().stream()
.anyMatch(index -> companyCode.equals(index.getName())); // .anyMatch(index -> companyCode.equals(index.getName()));
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("检查知识库是否存在失败: " + e.getMessage(), e); // throw new RuntimeException("检查知识库是否存在失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public String getKnowledgeBaseIdByName(String companyCode) { // public String getKnowledgeBaseIdByName(String companyCode) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId); // ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
//
return indicesResponse.getBody().getData().getIndices().stream() // return indicesResponse.getBody().getData().getIndices().stream()
.filter(index -> companyCode.equals(index.getName())) // .filter(index -> companyCode.equals(index.getName()))
.findFirst() // .findFirst()
.map(index -> index.getId()) // .map(index -> index.getId())
.orElse(""); // .orElse("");
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("查找知识库ID失败: " + e.getMessage(), e); // throw new RuntimeException("查找知识库ID失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public Map<String,Object> listDocuments(String kbId, Integer pageSize, Integer pageNumber) { // public Map<String,Object> listDocuments(String kbId, Integer pageSize, Integer pageNumber) {
Map<String,Object> ret = new HashMap<>(); // Map<String,Object> ret = new HashMap<>();
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
ListIndexDocumentsResponse indexDocumentsResponse = KnowledgeBaseUtil.listIndexDocuments(client, workspaceId, kbId, pageSize, pageNumber); // ListIndexDocumentsResponse indexDocumentsResponse = KnowledgeBaseUtil.listIndexDocuments(client, workspaceId, kbId, pageSize, pageNumber);
ret.put("data", indexDocumentsResponse.getBody().getData().getDocuments()); // ret.put("data", indexDocumentsResponse.getBody().getData().getDocuments());
ret.put("total", indexDocumentsResponse.getBody().getData().getTotalCount()); // ret.put("total", indexDocumentsResponse.getBody().getData().getTotalCount());
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("查询知识库下的文档列表失败: " + e.getMessage(), e); // throw new RuntimeException("查询知识库下的文档列表失败: " + e.getMessage(), e);
} // }
return ret; // return ret;
} // }
//
@Override // @Override
public boolean deleteIndex(String kbId) { // public boolean deleteIndex(String kbId) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
DeleteIndexResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndex(client, workspaceId, kbId); // DeleteIndexResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndex(client, workspaceId, kbId);
return indexDocumentResponse.getBody().getSuccess(); // return indexDocumentResponse.getBody().getSuccess();
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("删除知识库失败: " + e.getMessage(), e); // throw new RuntimeException("删除知识库失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public boolean deleteIndexDocument(String kbId, String fileIds) { // public boolean deleteIndexDocument(String kbId, String fileIds) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
List<String> ids = StrUtil.splitTrim(fileIds, ","); // List<String> ids = StrUtil.splitTrim(fileIds, ",");
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
DeleteIndexDocumentResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndexDocument(client, workspaceId, kbId, ids); // DeleteIndexDocumentResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndexDocument(client, workspaceId, kbId, ids);
return indexDocumentResponse.getBody().getSuccess(); // return indexDocumentResponse.getBody().getSuccess();
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("删除知识库下的文档失败: " + e.getMessage(), e); // throw new RuntimeException("删除知识库下的文档失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public boolean uploadDocuments(String kbId, MultipartFile[] files) { // public boolean uploadDocuments(String kbId, MultipartFile[] files) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
int count = files.length; // int count = files.length;
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
List<String> fileIds = KnowledgeBaseUploader.uploadDocuments(client, workspaceId, kbId, files); // List<String> fileIds = KnowledgeBaseUploader.uploadDocuments(client, workspaceId, kbId, files);
//上传切片完成后删除原文档(释放云空间) // //上传切片完成后删除原文档(释放云空间)
for(String fileId : fileIds) { // for(String fileId : fileIds) {
KnowledgeBaseUtil.deleteAppDocument(client, workspaceId, fileId); // KnowledgeBaseUtil.deleteAppDocument(client, workspaceId, fileId);
} // }
return !fileIds.isEmpty() && fileIds.size() == count; // return !fileIds.isEmpty() && fileIds.size() == count;
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("上传文档到知识库失败: " + e.getMessage(), e); // throw new RuntimeException("上传文档到知识库失败: " + e.getMessage(), e);
} // }
} // }
//
@Async // @Async
@Override // @Override
public void submitDocuments(String kbId, String fileId) { // public void submitDocuments(String kbId, String fileId) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileId); // AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileId);
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e); // throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
} // }
} // }
//
@Override // @Override
public void submitDocuments(String kbId, List<String> fileIds) { // public void submitDocuments(String kbId, List<String> fileIds) {
String workspaceId = config.getWorkspaceId(); // String workspaceId = config.getWorkspaceId();
try { // try {
Client client = clientFactory.createClient(); // Client client = clientFactory.createClient();
AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileIds); // AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileIds);
} catch (Exception e) { // } catch (Exception e) {
throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e); // throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
} // }
} // }
//
//
} //}

View File

@@ -1,10 +1,12 @@
package com.gxwebsoft.ai.util; package com.gxwebsoft.ai.util;
import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.alibaba.fastjson.JSON;
import com.aliyun.bailian20231229.models.CreateIndexResponse; import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteFileResponse; import com.aliyun.bailian20231229.models.DeleteFileResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse; import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
@@ -37,7 +39,35 @@ public class AiCloudKnowledgeBaseUtil {
RetrieveRequest retrieveRequest = new RetrieveRequest(); RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId); retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query); retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(null); retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
/**
* 在指定的知识库中检索信息。
*
* @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param query 检索查询语句
* @param filesIds 指定文件
* @return 阿里云百炼服务的响应
*/
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
List<Map<String, String>> searchFilters = new ArrayList<>();
Map<String, String> searchFiltersTags = new HashMap<>();
searchFiltersTags.put("tags", JSON.toJSONString(filesIds));
searchFilters.add(searchFiltersTags);
retrieveRequest.setSearchFilters(searchFilters);
RuntimeOptions runtime = new RuntimeOptions(); RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime); return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
} }

View File

@@ -1,384 +1,384 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import com.aliyun.bailian20231229.models.*; //import com.aliyun.bailian20231229.models.*;
import com.fasterxml.jackson.databind.ObjectMapper; //import com.fasterxml.jackson.databind.ObjectMapper;
//
import java.io.File; //import java.io.File;
import java.io.FileInputStream; //import java.io.FileInputStream;
import java.net.HttpURLConnection; //import java.net.HttpURLConnection;
import java.net.URL; //import java.net.URL;
import java.security.MessageDigest; //import java.security.MessageDigest;
import java.util.*; //import java.util.*;
import java.util.concurrent.TimeUnit; //import java.util.concurrent.TimeUnit;
//
/** ///**
* 创建知识库 // * 创建知识库
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseCreate { //public class KnowledgeBaseCreate {
//
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P"; // String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk"; // String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu"; // String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
/** // /**
* 检查并提示设置必要的环境变量。 // * 检查并提示设置必要的环境变量。
* // *
* @return true 如果所有必需的环境变量都已设置,否则 false // * @return true 如果所有必需的环境变量都已设置,否则 false
*/ // */
public static boolean checkEnvironmentVariables() { // public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>(); // Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID"); // requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
List<String> missingVars = new ArrayList<>(); // List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) { // for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey()); // String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) { // if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey()); // missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")"); // System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
} // }
} // }
//
return missingVars.isEmpty(); // return missingVars.isEmpty();
} // }
//
/** // /**
* 计算文档的MD5值。 // * 计算文档的MD5值。
* // *
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @return 文档的MD5值 // * @return 文档的MD5值
* @throws Exception 如果计算过程中发生错误 // * @throws Exception 如果计算过程中发生错误
*/ // */
public static String calculateMD5(String filePath) throws Exception { // public static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5"); // MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) { // try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead); // md.update(buffer, 0, bytesRead);
} // }
} // }
StringBuilder sb = new StringBuilder(); // StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) { // for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff)); // sb.append(String.format("%02x", b & 0xff));
} // }
return sb.toString(); // return sb.toString();
} // }
//
/** // /**
* 获取文档大小(以字节为单位)。 // * 获取文档大小(以字节为单位)。
* // *
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @return 文档大小(以字节为单位) // * @return 文档大小(以字节为单位)
*/ // */
public static String getFileSize(String filePath) { // public static String getFileSize(String filePath) {
File file = new File(filePath); // File file = new File(filePath);
long fileSize = file.length(); // long fileSize = file.length();
return String.valueOf(fileSize); // return String.valueOf(fileSize);
} // }
//
/** // /**
* 初始化客户端Client // * 初始化客户端Client
* // *
* @return 配置好的客户端对象 // * @return 配置好的客户端对象
*/ // */
public static com.aliyun.bailian20231229.Client createClient() throws Exception { // public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")) // .setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")); // .setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。 // // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com"; // config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config); // return new com.aliyun.bailian20231229.Client(config);
} // }
//
/** // /**
* 申请文档上传租约。 // * 申请文档上传租约。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param categoryId 类目ID // * @param categoryId 类目ID
* @param fileName 文档名称 // * @param fileName 文档名称
* @param fileMd5 文档的MD5值 // * @param fileMd5 文档的MD5值
* @param fileSize 文档大小(以字节为单位) // * @param fileSize 文档大小(以字节为单位)
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId, // public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception { // String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest(); // com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
applyFileUploadLeaseRequest.setFileName(fileName); // applyFileUploadLeaseRequest.setFileName(fileName);
applyFileUploadLeaseRequest.setMd5(fileMd5); // applyFileUploadLeaseRequest.setMd5(fileMd5);
applyFileUploadLeaseRequest.setSizeInBytes(fileSize); // applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null; // ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId, // applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
applyFileUploadLeaseRequest, headers, runtime); // applyFileUploadLeaseRequest, headers, runtime);
return applyFileUploadLeaseResponse; // return applyFileUploadLeaseResponse;
} // }
//
/** // /**
* 上传文档到临时存储。 // * 上传文档到临时存储。
* // *
* @param preSignedUrl 上传租约中的 URL // * @param preSignedUrl 上传租约中的 URL
* @param headers 上传请求的头部 // * @param headers 上传请求的头部
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @throws Exception 如果上传过程中发生错误 // * @throws Exception 如果上传过程中发生错误
*/ // */
public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception { // public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
File file = new File(filePath); // File file = new File(filePath);
if (!file.exists() || !file.isFile()) { // if (!file.exists() || !file.isFile()) {
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath); // throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
} // }
//
try (FileInputStream fis = new FileInputStream(file)) { // try (FileInputStream fis = new FileInputStream(file)) {
URL url = new URL(preSignedUrl); // URL url = new URL(preSignedUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); // HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("PUT"); // conn.setRequestMethod("PUT");
conn.setDoOutput(true); // conn.setDoOutput(true);
//
// 设置上传请求头 // // 设置上传请求头
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra")); // conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type")); // conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// 分块读取并上传文档 // // 分块读取并上传文档
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead); // conn.getOutputStream().write(buffer, 0, bytesRead);
} // }
//
int responseCode = conn.getResponseCode(); // int responseCode = conn.getResponseCode();
if (responseCode != 200) { // if (responseCode != 200) {
throw new RuntimeException("上传失败: " + responseCode); // throw new RuntimeException("上传失败: " + responseCode);
} // }
} // }
} // }
//
/** // /**
* 将文档添加到类目中。 // * 将文档添加到类目中。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param leaseId 租约ID // * @param leaseId 租约ID
* @param parser 用于文档的解析器 // * @param parser 用于文档的解析器
* @param categoryId 类目ID // * @param categoryId 类目ID
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser, // public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
String categoryId, String workspaceId) throws Exception { // String categoryId, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest(); // com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
addFileRequest.setLeaseId(leaseId); // addFileRequest.setLeaseId(leaseId);
addFileRequest.setParser(parser); // addFileRequest.setParser(parser);
addFileRequest.setCategoryId(categoryId); // addFileRequest.setCategoryId(categoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime); // return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
} // }
//
/** // /**
* 查询文档的基本信息。 // * 查询文档的基本信息。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param fileId 文档ID // * @param fileId 文档ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId, // public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId) throws Exception { // String fileId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime); // return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
} // }
//
/** // /**
* 在阿里云百炼服务中创建知识库(初始化)。 // * 在阿里云百炼服务中创建知识库(初始化)。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param fileId 文档ID // * @param fileId 文档ID
* @param name 知识库名称 // * @param name 知识库名称
* @param structureType 知识库的数据类型 // * @param structureType 知识库的数据类型
* @param sourceType 应用数据的数据类型,支持类目类型和文档类型 // * @param sourceType 应用数据的数据类型,支持类目类型和文档类型
* @param sinkType 知识库的向量存储类型 // * @param sinkType 知识库的向量存储类型
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, // public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception { // String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest(); // com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
createIndexRequest.setStructureType(structureType); // createIndexRequest.setStructureType(structureType);
createIndexRequest.setName(name); // createIndexRequest.setName(name);
createIndexRequest.setSourceType(sourceType); // createIndexRequest.setSourceType(sourceType);
createIndexRequest.setSinkType(sinkType); // createIndexRequest.setSinkType(sinkType);
createIndexRequest.setDocumentIds(Collections.singletonList(fileId)); // createIndexRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime); // return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
} // }
//
/** // /**
* 向阿里云百炼服务提交索引任务。 // * 向阿里云百炼服务提交索引任务。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId, // public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId) throws Exception { // String indexId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest(); // com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest();
submitIndexJobRequest.setIndexId(indexId); // submitIndexJobRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime); // return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime);
} // }
//
/** // /**
* 查询索引任务状态。 // * 查询索引任务状态。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param jobId 任务ID // * @param jobId 任务ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client, // public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception { // String workspaceId, String jobId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest(); // com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
getIndexJobStatusRequest.setIndexId(indexId); // getIndexJobStatusRequest.setIndexId(indexId);
getIndexJobStatusRequest.setJobId(jobId); // getIndexJobStatusRequest.setJobId(jobId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
GetIndexJobStatusResponse getIndexJobStatusResponse = null; // GetIndexJobStatusResponse getIndexJobStatusResponse = null;
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers, // getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
runtime); // runtime);
return getIndexJobStatusResponse; // return getIndexJobStatusResponse;
} // }
//
/** // /**
* 使用阿里云百炼服务创建知识库。 // * 使用阿里云百炼服务创建知识库。
* // *
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param name 知识库名称 // * @param name 知识库名称
* @return 如果成功返回知识库ID否则返回 null // * @return 如果成功返回知识库ID否则返回 null
*/ // */
public static String createKnowledgeBase(String filePath, String workspaceId, String name) { // public static String createKnowledgeBase(String filePath, String workspaceId, String name) {
// 设置默认值 // // 设置默认值
String categoryId = "default"; // String categoryId = "default";
String parser = "DASHSCOPE_DOCMIND"; // String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE"; // String sourceType = "DATA_CENTER_FILE";
String structureType = "unstructured"; // String structureType = "unstructured";
String sinkType = "DEFAULT"; // String sinkType = "DEFAULT";
try { // try {
// 步骤1初始化客户端Client // // 步骤1初始化客户端Client
System.out.println("步骤1初始化Client"); // System.out.println("步骤1初始化Client");
com.aliyun.bailian20231229.Client client = createClient(); // com.aliyun.bailian20231229.Client client = createClient();
//
// 步骤2准备文档信息 // // 步骤2准备文档信息
System.out.println("步骤2准备文档信息"); // System.out.println("步骤2准备文档信息");
String fileName = new File(filePath).getName(); // String fileName = new File(filePath).getName();
String fileMd5 = calculateMD5(filePath); // String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath); // String fileSize = getFileSize(filePath);
//
// 步骤3申请上传租约 // // 步骤3申请上传租约
System.out.println("步骤3向阿里云百炼申请上传租约"); // System.out.println("步骤3向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, // ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
workspaceId); // workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); // String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); // String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders(); // Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
//
// 步骤4上传文档 // // 步骤4上传文档
System.out.println("步骤4上传文档到阿里云百炼"); // System.out.println("步骤4上传文档到阿里云百炼");
// 请自行安装jackson-databind // // 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式) // // 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper // Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
.readValue(mapper.writeValueAsString(uploadHeaders), Map.class); // .readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath); // uploadFile(uploadUrl, uploadHeadersMap, filePath);
//
// 步骤5将文档添加到服务器 // // 步骤5将文档添加到服务器
System.out.println("步骤5将文档添加到阿里云百炼服务器"); // System.out.println("步骤5将文档添加到阿里云百炼服务器");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId); // AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId(); // String fileId = addResponse.getBody().getData().getFileId();
//
// 步骤6检查文档状态 // // 步骤6检查文档状态
System.out.println("步骤6检查阿里云百炼中的文档状态"); // System.out.println("步骤6检查阿里云百炼中的文档状态");
while (true) { // while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId); // DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus(); // String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文档状态:" + status); // System.out.println("当前文档状态:" + status);
//
if (status.equals("INIT")) { // if (status.equals("INIT")) {
System.out.println("文档待解析,请稍候..."); // System.out.println("文档待解析,请稍候...");
} else if (status.equals("PARSING")) { // } else if (status.equals("PARSING")) {
System.out.println("文档解析中,请稍候..."); // System.out.println("文档解析中,请稍候...");
} else if (status.equals("PARSE_SUCCESS")) { // } else if (status.equals("PARSE_SUCCESS")) {
System.out.println("文档解析完成!"); // System.out.println("文档解析完成!");
break; // break;
} else { // } else {
System.out.println("未知的文档状态:" + status + ",请联系技术支持。"); // System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
return null; // return null;
} // }
TimeUnit.SECONDS.sleep(5); // TimeUnit.SECONDS.sleep(5);
} // }
//
// 步骤7初始化知识库 // // 步骤7初始化知识库
System.out.println("步骤7在阿里云百炼中创建知识库"); // System.out.println("步骤7在阿里云百炼中创建知识库");
CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType, // CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType,
sourceType, sinkType); // sourceType, sinkType);
String indexId = indexResponse.getBody().getData().getId(); // String indexId = indexResponse.getBody().getData().getId();
//
// 步骤8提交索引任务 // // 步骤8提交索引任务
System.out.println("步骤8向阿里云百炼提交索引任务"); // System.out.println("步骤8向阿里云百炼提交索引任务");
SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId); // SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId);
String jobId = submitResponse.getBody().getData().getId(); // String jobId = submitResponse.getBody().getData().getId();
//
// 步骤9获取索引任务状态 // // 步骤9获取索引任务状态
System.out.println("步骤9获取阿里云百炼索引任务状态"); // System.out.println("步骤9获取阿里云百炼索引任务状态");
while (true) { // while (true) {
GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId); // GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = getStatusResponse.getBody().getData().getStatus(); // String status = getStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status); // System.out.println("当前索引任务状态:" + status);
//
if (status.equals("COMPLETED")) { // if (status.equals("COMPLETED")) {
break; // break;
} // }
TimeUnit.SECONDS.sleep(5); // TimeUnit.SECONDS.sleep(5);
} // }
//
System.out.println("阿里云百炼知识库创建成功!"); // System.out.println("阿里云百炼知识库创建成功!");
return indexId; // return indexId;
//
} catch (Exception e) { // } catch (Exception e) {
System.out.println("发生错误:" + e.getMessage()); // System.out.println("发生错误:" + e.getMessage());
e.printStackTrace(); // e.printStackTrace();
return null; // return null;
} // }
} // }
//
/** // /**
* 主函数。 // * 主函数。
*/ // */
public static void main(String[] args) { // public static void main(String[] args) {
Scanner scanner = new Scanner(System.in); // Scanner scanner = new Scanner(System.in);
if (!checkEnvironmentVariables()) { // if (!checkEnvironmentVariables()) {
return; // return;
} // }
//
System.out.print("请输入您需要上传文档的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx"); // System.out.print("请输入您需要上传文档的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
String filePath = scanner.nextLine(); // String filePath = scanner.nextLine();
//
System.out.print("请为您的知识库输入一个名称:"); // System.out.print("请为您的知识库输入一个名称:");
String kbName = scanner.nextLine(); // String kbName = scanner.nextLine();
//
String workspaceId = System.getenv("WORKSPACE_ID"); // String workspaceId = System.getenv("WORKSPACE_ID");
String result = createKnowledgeBase(filePath, workspaceId, kbName); // String result = createKnowledgeBase(filePath, workspaceId, kbName);
if (result != null) { // if (result != null) {
System.out.println("知识库ID: " + result); // System.out.println("知识库ID: " + result);
} // }
} // }
} //}

View File

@@ -1,145 +1,145 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import com.aliyun.bailian20231229.models.DeleteIndexResponse; //import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse; //import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.fasterxml.jackson.databind.ObjectMapper; //import com.fasterxml.jackson.databind.ObjectMapper;
//
import java.util.*; //import java.util.*;
//
/** ///**
* 管理知识库 // * 管理知识库
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseManage { //public class KnowledgeBaseManage {
//
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P"; // String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk"; // String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu"; // String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
/** // /**
* 检查并提示设置必要的环境变量。 // * 检查并提示设置必要的环境变量。
* // *
* @return true 如果所有必需的环境变量都已设置,否则 false // * @return true 如果所有必需的环境变量都已设置,否则 false
*/ // */
public static boolean checkEnvironmentVariables() { // public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>(); // Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID"); // requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
List<String> missingVars = new ArrayList<>(); // List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) { // for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey()); // String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) { // if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey()); // missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")"); // System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
} // }
} // }
//
return missingVars.isEmpty(); // return missingVars.isEmpty();
} // }
//
/** // /**
* 创建并配置客户端Client // * 创建并配置客户端Client
* // *
* @return 配置好的客户端Client // * @return 配置好的客户端Client
*/ // */
public static com.aliyun.bailian20231229.Client createClient() throws Exception { // public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.credentials.Client credential = new com.aliyun.credentials.Client(); // com.aliyun.credentials.Client credential = new com.aliyun.credentials.Client();
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setCredential(credential); // .setCredential(credential);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。 // // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com"; // config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config); // return new com.aliyun.bailian20231229.Client(config);
} // }
//
/** // /**
* 获取指定业务空间下一个或多个知识库的详细信息 // * 获取指定业务空间下一个或多个知识库的详细信息
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId) // public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId)
throws Exception { // throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest(); // com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime); // return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
} // }
//
/** // /**
* 永久性删除指定的知识库 // * 永久性删除指定的知识库
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId, // public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId) throws Exception { // String indexId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest(); // com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
deleteIndexRequest.setIndexId(indexId); // deleteIndexRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime); // return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
} // }
//
/** // /**
* 主函数 // * 主函数
*/ // */
public static void main(String[] args) { // public static void main(String[] args) {
if (!checkEnvironmentVariables()) { // if (!checkEnvironmentVariables()) {
System.out.println("环境变量校验未通过。"); // System.out.println("环境变量校验未通过。");
return; // return;
} // }
//
try { // try {
Scanner scanner = new Scanner(System.in); // Scanner scanner = new Scanner(System.in);
System.out.print("请选择要执行的操作:\n1. 查看知识库\n2. 删除知识库\n请输入选项1或2"); // System.out.print("请选择要执行的操作:\n1. 查看知识库\n2. 删除知识库\n请输入选项1或2");
String startOption = scanner.nextLine(); // String startOption = scanner.nextLine();
com.aliyun.bailian20231229.Client client = createClient(); // com.aliyun.bailian20231229.Client client = createClient();
if (startOption.equals("1")) { // if (startOption.equals("1")) {
// 查看知识库 // // 查看知识库
System.out.println("\n执行查看知识库"); // System.out.println("\n执行查看知识库");
String workspaceId = System.getenv("WORKSPACE_ID"); // String workspaceId = System.getenv("WORKSPACE_ID");
ListIndicesResponse response = listIndices(client, workspaceId); // ListIndicesResponse response = listIndices(client, workspaceId);
// 请自行安装jackson-databind。将响应转换为 JSON 字符串 // // 请自行安装jackson-databind。将响应转换为 JSON 字符串
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
String result = mapper.writeValueAsString(response.getBody().getData()); // String result = mapper.writeValueAsString(response.getBody().getData());
System.out.println(result); // System.out.println(result);
} else if (startOption.equals("2")) { // } else if (startOption.equals("2")) {
System.out.println("\n执行删除知识库"); // System.out.println("\n执行删除知识库");
String workspaceId = System.getenv("WORKSPACE_ID"); // String workspaceId = System.getenv("WORKSPACE_ID");
System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。 // System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
String indexId = scanner.nextLine(); // String indexId = scanner.nextLine();
// 删除前二次确认 // // 删除前二次确认
boolean confirm = false; // boolean confirm = false;
while (!confirm) { // while (!confirm) {
System.out.print("您确定要永久性删除该知识库 " + indexId + " 吗?(y/n): "); // System.out.print("您确定要永久性删除该知识库 " + indexId + " 吗?(y/n): ");
String input = scanner.nextLine().trim().toLowerCase(); // String input = scanner.nextLine().trim().toLowerCase();
if (input.equals("y")) { // if (input.equals("y")) {
confirm = true; // confirm = true;
} else if (input.equals("n")) { // } else if (input.equals("n")) {
System.out.println("已取消删除操作。"); // System.out.println("已取消删除操作。");
return; // return;
} else { // } else {
System.out.println("无效输入,请输入 y 或 n。"); // System.out.println("无效输入,请输入 y 或 n。");
} // }
} // }
DeleteIndexResponse resp = deleteIndex(client, workspaceId, indexId); // DeleteIndexResponse resp = deleteIndex(client, workspaceId, indexId);
if (resp.getBody().getStatus().equals("200")) { // if (resp.getBody().getStatus().equals("200")) {
System.out.println("知识库" + indexId + "删除成功!"); // System.out.println("知识库" + indexId + "删除成功!");
} else { // } else {
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
System.out.println("发生错误:" + mapper.writeValueAsString(resp.getBody())); // System.out.println("发生错误:" + mapper.writeValueAsString(resp.getBody()));
} // }
} else { // } else {
System.out.println("无效的选项,程序退出。"); // System.out.println("无效的选项,程序退出。");
} // }
} catch (Exception e) { // } catch (Exception e) {
System.out.println("发生错误:" + e.getMessage()); // System.out.println("发生错误:" + e.getMessage());
} // }
} // }
} //}

View File

@@ -1,110 +1,110 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import com.aliyun.bailian20231229.models.RetrieveRequest; //import com.aliyun.bailian20231229.models.RetrieveRequest;
import com.aliyun.bailian20231229.models.RetrieveResponse; //import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.teautil.models.RuntimeOptions; //import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper; //import com.fasterxml.jackson.databind.ObjectMapper;
//
import java.util.*; //import java.util.*;
//
/** ///**
* 检索知识库 // * 检索知识库
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseRetrieve { //public class KnowledgeBaseRetrieve {
//
static String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P"; // static String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
static String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk"; // static String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
static String WORKSPACE_ID = "llm-4pf5auwewoz34zqu"; // static String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
//
/** // /**
* 检查并提示设置必要的环境变量。 // * 检查并提示设置必要的环境变量。
* // *
* @return true 如果所有必需的环境变量都已设置,否则 false // * @return true 如果所有必需的环境变量都已设置,否则 false
*/ // */
public static boolean checkEnvironmentVariables() { // public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>(); // Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID"); // requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
List<String> missingVars = new ArrayList<>(); // List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) { // for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey()); // String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) { // if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey()); // missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")"); // System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
} // }
}
return missingVars.isEmpty();
}
/**
* 初始化客户端Client
*
* @return 配置好的客户端对象
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(ALIBABA_CLOUD_ACCESS_KEY_ID)
.setAccessKeySecret(ALIBABA_CLOUD_ACCESS_KEY_SECRET);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 在指定的知识库中检索信息。
*
* @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param query 检索查询语句
* @return 阿里云百炼服务的响应
*/
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId, String query) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(null);
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
/**
* 使用阿里云百炼服务检索知识库。
*/
public static void main(String[] args) {
// if (!checkEnvironmentVariables()) {
// System.out.println("环境变量校验未通过。");
// return;
// } // }
//
try { // return missingVars.isEmpty();
// 步骤1初始化客户端Client // }
System.out.println("步骤1创建Client"); //
com.aliyun.bailian20231229.Client client = createClient(); // /**
// * 初始化客户端Client
// 步骤2检索知识库 // *
System.out.println("步骤2检索知识库"); // * @return 配置好的客户端对象
Scanner scanner = new Scanner(System.in); // */
System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。 // public static com.aliyun.bailian20231229.Client createClient() throws Exception {
String indexId = scanner.nextLine(); // com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
System.out.print("请输入检索query"); // .setAccessKeyId(ALIBABA_CLOUD_ACCESS_KEY_ID)
String query = scanner.nextLine(); // .setAccessKeySecret(ALIBABA_CLOUD_ACCESS_KEY_SECRET);
String workspaceId = WORKSPACE_ID; // // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
RetrieveResponse resp = retrieveIndex(client, workspaceId, indexId, query); // config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// 请自行安装jackson-databind。将响应体responsebody转换为 JSON 字符串 // }
ObjectMapper mapper = new ObjectMapper(); //
String result = mapper.writeValueAsString(resp.getBody()); // /**
System.out.println(result); // * 在指定的知识库中检索信息。
} catch (Exception e) { // *
System.out.println("发生错误:" + e.getMessage()); // * @param client 客户端对象bailian20231229Client
} // * @param workspaceId 业务空间ID
} // * @param indexId 知识库ID
} // * @param query 检索查询语句
// * @return 阿里云百炼服务的响应
// */
// public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
// String indexId, String query) throws Exception {
// RetrieveRequest retrieveRequest = new RetrieveRequest();
// retrieveRequest.setIndexId(indexId);
// retrieveRequest.setQuery(query);
// retrieveRequest.setDenseSimilarityTopK(null);
// RuntimeOptions runtime = new RuntimeOptions();
// return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
// }
//
// /**
// * 使用阿里云百炼服务检索知识库。
// */
// public static void main(String[] args) {
//// if (!checkEnvironmentVariables()) {
//// System.out.println("环境变量校验未通过。");
//// return;
//// }
//
// try {
// // 步骤1初始化客户端Client
// System.out.println("步骤1创建Client");
// com.aliyun.bailian20231229.Client client = createClient();
//
// // 步骤2检索知识库
// System.out.println("步骤2检索知识库");
// Scanner scanner = new Scanner(System.in);
// System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
// String indexId = scanner.nextLine();
// System.out.print("请输入检索query");
// String query = scanner.nextLine();
// String workspaceId = WORKSPACE_ID;
// RetrieveResponse resp = retrieveIndex(client, workspaceId, indexId, query);
//
// // 请自行安装jackson-databind。将响应体responsebody转换为 JSON 字符串
// ObjectMapper mapper = new ObjectMapper();
// String result = mapper.writeValueAsString(resp.getBody());
// System.out.println(result);
// } catch (Exception e) {
// System.out.println("发生错误:" + e.getMessage());
// }
// }
//}

View File

@@ -1,384 +1,384 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import com.aliyun.bailian20231229.models.*; //import com.aliyun.bailian20231229.models.*;
import com.aliyun.teautil.models.RuntimeOptions; //import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper; //import com.fasterxml.jackson.databind.ObjectMapper;
//
import java.io.File; //import java.io.File;
import java.io.FileInputStream; //import java.io.FileInputStream;
import java.net.HttpURLConnection; //import java.net.HttpURLConnection;
import java.net.URL; //import java.net.URL;
import java.nio.file.Paths; //import java.nio.file.Paths;
import java.security.MessageDigest; //import java.security.MessageDigest;
import java.util.*; //import java.util.*;
//
/** ///**
* 更新知识库 // * 更新知识库
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseUpdate { //public class KnowledgeBaseUpdate {
//
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P"; // String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk"; // String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu"; // String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
/** // /**
* 检查并提示设置必要的环境变量。 // * 检查并提示设置必要的环境变量。
* // *
* @return true 如果所有必需的环境变量都已设置,否则 false // * @return true 如果所有必需的环境变量都已设置,否则 false
*/ // */
public static boolean checkEnvironmentVariables() { // public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>(); // Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码"); // requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID"); // requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
List<String> missingVars = new ArrayList<>(); // List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) { // for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey()); // String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) { // if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey()); // missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")"); // System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
} // }
} // }
//
return missingVars.isEmpty(); // return missingVars.isEmpty();
} // }
//
/** // /**
* 创建并配置客户端Client // * 创建并配置客户端Client
* // *
* @return 配置好的客户端Client // * @return 配置好的客户端Client
*/ // */
public static com.aliyun.bailian20231229.Client createClient() throws Exception { // public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")) // .setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")); // .setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。 // // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com"; // config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config); // return new com.aliyun.bailian20231229.Client(config);
} // }
//
/** // /**
* 计算文档的MD5值 // * 计算文档的MD5值
* // *
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @return 文档的MD5值 // * @return 文档的MD5值
*/ // */
public static String calculateMD5(String filePath) throws Exception { // public static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5"); // MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) { // try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead); // md.update(buffer, 0, bytesRead);
} // }
} // }
StringBuilder sb = new StringBuilder(); // StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) { // for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff)); // sb.append(String.format("%02x", b & 0xff));
} // }
return sb.toString(); // return sb.toString();
} // }
//
/** // /**
* 获取文档大小(以字节为单位) // * 获取文档大小(以字节为单位)
* // *
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @return 文档大小(以字节为单位) // * @return 文档大小(以字节为单位)
*/ // */
public static String getFileSize(String filePath) { // public static String getFileSize(String filePath) {
File file = new File(filePath); // File file = new File(filePath);
long fileSize = file.length(); // long fileSize = file.length();
return String.valueOf(fileSize); // return String.valueOf(fileSize);
} // }
//
/** // /**
* 申请文档上传租约。 // * 申请文档上传租约。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param categoryId 类目ID // * @param categoryId 类目ID
* @param fileName 文档名称 // * @param fileName 文档名称
* @param fileMd5 文档的MD5值 // * @param fileMd5 文档的MD5值
* @param fileSize 文档大小(以字节为单位) // * @param fileSize 文档大小(以字节为单位)
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId, // public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception { // String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest(); // com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
applyFileUploadLeaseRequest.setFileName(fileName); // applyFileUploadLeaseRequest.setFileName(fileName);
applyFileUploadLeaseRequest.setMd5(fileMd5); // applyFileUploadLeaseRequest.setMd5(fileMd5);
applyFileUploadLeaseRequest.setSizeInBytes(fileSize); // applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null; // ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId, // applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
applyFileUploadLeaseRequest, headers, runtime); // applyFileUploadLeaseRequest, headers, runtime);
return applyFileUploadLeaseResponse; // return applyFileUploadLeaseResponse;
} // }
//
/** // /**
* 上传文档到临时存储。 // * 上传文档到临时存储。
* // *
* @param preSignedUrl 上传租约中的 URL // * @param preSignedUrl 上传租约中的 URL
* @param headers 上传请求的头部 // * @param headers 上传请求的头部
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @throws Exception 如果上传过程中发生错误 // * @throws Exception 如果上传过程中发生错误
*/ // */
public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception { // public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
File file = new File(filePath); // File file = new File(filePath);
if (!file.exists() || !file.isFile()) { // if (!file.exists() || !file.isFile()) {
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath); // throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
} // }
//
try (FileInputStream fis = new FileInputStream(file)) { // try (FileInputStream fis = new FileInputStream(file)) {
URL url = new URL(preSignedUrl); // URL url = new URL(preSignedUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); // HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("PUT"); // conn.setRequestMethod("PUT");
conn.setDoOutput(true); // conn.setDoOutput(true);
//
// 设置上传请求头 // // 设置上传请求头
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra")); // conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type")); // conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// 分块读取并上传文档 // // 分块读取并上传文档
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead); // conn.getOutputStream().write(buffer, 0, bytesRead);
} // }
//
int responseCode = conn.getResponseCode(); // int responseCode = conn.getResponseCode();
if (responseCode != 200) { // if (responseCode != 200) {
throw new RuntimeException("上传失败: " + responseCode); // throw new RuntimeException("上传失败: " + responseCode);
} // }
} // }
} // }
//
/** // /**
* 将文档添加到类目中。 // * 将文档添加到类目中。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param leaseId 租约ID // * @param leaseId 租约ID
* @param parser 用于文档的解析器 // * @param parser 用于文档的解析器
* @param categoryId 类目ID // * @param categoryId 类目ID
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser, // public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
String categoryId, String workspaceId) throws Exception { // String categoryId, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest(); // com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
addFileRequest.setLeaseId(leaseId); // addFileRequest.setLeaseId(leaseId);
addFileRequest.setParser(parser); // addFileRequest.setParser(parser);
addFileRequest.setCategoryId(categoryId); // addFileRequest.setCategoryId(categoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime); // return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
} // }
//
/** // /**
* 查询文档的基本信息。 // * 查询文档的基本信息。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param fileId 文档ID // * @param fileId 文档ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId, // public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId) throws Exception { // String fileId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime); // return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
} // }
//
/** // /**
* 向一个非结构化知识库追加导入已解析的文档 // * 向一个非结构化知识库追加导入已解析的文档
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param fileId 文档ID // * @param fileId 文档ID
* @param sourceType 数据类型 // * @param sourceType 数据类型
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob( // public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(
com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId, // com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId,
String sourceType) throws Exception { // String sourceType) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest(); // SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest();
submitIndexAddDocumentsJobRequest.setIndexId(indexId); // submitIndexAddDocumentsJobRequest.setIndexId(indexId);
submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId)); // submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId));
submitIndexAddDocumentsJobRequest.setSourceType(sourceType); // submitIndexAddDocumentsJobRequest.setSourceType(sourceType);
RuntimeOptions runtime = new RuntimeOptions(); // RuntimeOptions runtime = new RuntimeOptions();
return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers, // return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers,
runtime); // runtime);
} // }
//
/** // /**
* 查询索引任务状态。 // * 查询索引任务状态。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param jobId 任务ID // * @param jobId 任务ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client, // public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception { // String workspaceId, String jobId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest(); // com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
getIndexJobStatusRequest.setIndexId(indexId); // getIndexJobStatusRequest.setIndexId(indexId);
getIndexJobStatusRequest.setJobId(jobId); // getIndexJobStatusRequest.setJobId(jobId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
GetIndexJobStatusResponse getIndexJobStatusResponse = null; // GetIndexJobStatusResponse getIndexJobStatusResponse = null;
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers, // getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
runtime); // runtime);
return getIndexJobStatusResponse; // return getIndexJobStatusResponse;
} // }
//
/** // /**
* 从指定的非结构化知识库中永久删除一个或多个文档 // * 从指定的非结构化知识库中永久删除一个或多个文档
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param fileId 文档ID // * @param fileId 文档ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, // public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client,
String workspaceId, String indexId, String fileId) throws Exception { // String workspaceId, String indexId, String fileId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest(); // DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest();
deleteIndexDocumentRequest.setIndexId(indexId); // deleteIndexDocumentRequest.setIndexId(indexId);
deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId)); // deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime); // return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime);
} // }
//
/** // /**
* 使用阿里云百炼服务更新知识库 // * 使用阿里云百炼服务更新知识库
* // *
* @param filePath 文档(更新后的)的实际本地路径 // * @param filePath 文档(更新后的)的实际本地路径
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 需要更新的知识库ID // * @param indexId 需要更新的知识库ID
* @param oldFileId 需要更新的文档的FileID // * @param oldFileId 需要更新的文档的FileID
* @return 如果成功返回知识库ID否则返回 null // * @return 如果成功返回知识库ID否则返回 null
*/ // */
public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) { // public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) {
// 设置默认值 // // 设置默认值
String categoryId = "default"; // String categoryId = "default";
String parser = "DASHSCOPE_DOCMIND"; // String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE"; // String sourceType = "DATA_CENTER_FILE";
try { // try {
// 步骤1初始化客户端Client // // 步骤1初始化客户端Client
System.out.println("步骤1创建Client"); // System.out.println("步骤1创建Client");
com.aliyun.bailian20231229.Client client = createClient(); // com.aliyun.bailian20231229.Client client = createClient();
//
// 步骤2准备文档信息更新后的文档 // // 步骤2准备文档信息更新后的文档
System.out.println("步骤2准备文档信息"); // System.out.println("步骤2准备文档信息");
String fileName = Paths.get(filePath).getFileName().toString(); // String fileName = Paths.get(filePath).getFileName().toString();
String fileMd5 = calculateMD5(filePath); // String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath); // String fileSize = getFileSize(filePath);
//
// 步骤3申请上传租约 // // 步骤3申请上传租约
System.out.println("步骤3向阿里云百炼申请上传租约"); // System.out.println("步骤3向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, // ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
workspaceId); // workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); // String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); // String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders(); // Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
//
// 步骤4上传文档到临时存储 // // 步骤4上传文档到临时存储
System.out.println("步骤4上传文档到临时存储"); // System.out.println("步骤4上传文档到临时存储");
// 请自行安装jackson-databind // // 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式) // // 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper // Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
.readValue(mapper.writeValueAsString(uploadHeaders), Map.class); // .readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath); // uploadFile(uploadUrl, uploadHeadersMap, filePath);
//
// 步骤5添加文档到类目中 // // 步骤5添加文档到类目中
System.out.println("步骤5添加文档到类目中"); // System.out.println("步骤5添加文档到类目中");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId); // AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId(); // String fileId = addResponse.getBody().getData().getFileId();
//
// 步骤6检查更新后的文档状态 // // 步骤6检查更新后的文档状态
System.out.println("步骤6检查阿里云百炼中的文档状态"); // System.out.println("步骤6检查阿里云百炼中的文档状态");
while (true) { // while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId); // DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus(); // String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文档状态:" + status); // System.out.println("当前文档状态:" + status);
if ("INIT".equals(status)) { // if ("INIT".equals(status)) {
System.out.println("文档待解析,请稍候..."); // System.out.println("文档待解析,请稍候...");
} else if ("PARSING".equals(status)) { // } else if ("PARSING".equals(status)) {
System.out.println("文档解析中,请稍候..."); // System.out.println("文档解析中,请稍候...");
} else if ("PARSE_SUCCESS".equals(status)) { // } else if ("PARSE_SUCCESS".equals(status)) {
System.out.println("文档解析完成!"); // System.out.println("文档解析完成!");
break; // break;
} else { // } else {
System.out.println("未知的文档状态:" + status + ",请联系技术支持。"); // System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
return null; // return null;
} // }
Thread.sleep(5000); // Thread.sleep(5000);
} // }
//
// 步骤7提交追加文档任务 // // 步骤7提交追加文档任务
System.out.println("步骤7提交追加文档任务"); // System.out.println("步骤7提交追加文档任务");
SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId, // SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId,
indexId, fileId, sourceType); // indexId, fileId, sourceType);
String jobId = indexAddResponse.getBody().getData().getId(); // String jobId = indexAddResponse.getBody().getData().getId();
//
// 步骤8等待追加任务完成 // // 步骤8等待追加任务完成
System.out.println("步骤8等待追加任务完成"); // System.out.println("步骤8等待追加任务完成");
while (true) { // while (true) {
GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId); // GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = jobStatusResponse.getBody().getData().getStatus(); // String status = jobStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status); // System.out.println("当前索引任务状态:" + status);
if ("COMPLETED".equals(status)) { // if ("COMPLETED".equals(status)) {
break; // break;
} // }
Thread.sleep(5000); // Thread.sleep(5000);
} // }
//
// 步骤9删除旧文档 // // 步骤9删除旧文档
System.out.println("步骤9删除旧文档"); // System.out.println("步骤9删除旧文档");
deleteIndexDocument(client, workspaceId, indexId, oldFileId); // deleteIndexDocument(client, workspaceId, indexId, oldFileId);
//
System.out.println("阿里云百炼知识库更新成功!"); // System.out.println("阿里云百炼知识库更新成功!");
return indexId; // return indexId;
} catch (Exception e) { // } catch (Exception e) {
System.out.println("发生错误:" + e.getMessage()); // System.out.println("发生错误:" + e.getMessage());
return null; // return null;
} // }
} // }
//
/** // /**
* 主函数。 // * 主函数。
*/ // */
public static void main(String[] args) { // public static void main(String[] args) {
if (!checkEnvironmentVariables()) { // if (!checkEnvironmentVariables()) {
System.out.println("环境变量校验未通过。"); // System.out.println("环境变量校验未通过。");
return; // return;
} // }
//
Scanner scanner = new Scanner(System.in); // Scanner scanner = new Scanner(System.in);
System.out.print("请输入您需要上传文档更新后的的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx"); // System.out.print("请输入您需要上传文档更新后的的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
String filePath = scanner.nextLine(); // String filePath = scanner.nextLine();
//
System.out.print("请输入需要更新的知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。 // System.out.print("请输入需要更新的知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
String indexId = scanner.nextLine(); // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面单击文件名称旁的 ID 图标获取。 // String indexId = scanner.nextLine(); // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面单击文件名称旁的 ID 图标获取。
//
System.out.print("请输入需要更新的文档的 FileID"); // System.out.print("请输入需要更新的文档的 FileID");
String oldFileId = scanner.nextLine(); // String oldFileId = scanner.nextLine();
//
String workspaceId = System.getenv("WORKSPACE_ID"); // String workspaceId = System.getenv("WORKSPACE_ID");
String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId); // String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId);
if (result != null) { // if (result != null) {
System.out.println("知识库更新成功返回知识库ID: " + result); // System.out.println("知识库更新成功返回知识库ID: " + result);
} else { // } else {
System.out.println("知识库更新失败。"); // System.out.println("知识库更新失败。");
} // }
} // }
} //}

View File

@@ -1,303 +1,303 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import com.aliyun.bailian20231229.Client; //import com.aliyun.bailian20231229.Client;
import com.aliyun.bailian20231229.models.*; //import com.aliyun.bailian20231229.models.*;
import com.aliyun.teautil.models.RuntimeOptions; //import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper; //import com.fasterxml.jackson.databind.ObjectMapper;
//
import java.io.File; //import java.io.File;
import java.io.FileInputStream; //import java.io.FileInputStream;
import java.io.InputStream; //import java.io.InputStream;
import java.net.HttpURLConnection; //import java.net.HttpURLConnection;
import java.net.URL; //import java.net.URL;
import java.nio.file.Paths; //import java.nio.file.Paths;
import java.security.MessageDigest; //import java.security.MessageDigest;
import java.util.*; //import java.util.*;
//
import org.springframework.web.multipart.MultipartFile; //import org.springframework.web.multipart.MultipartFile;
//
/** ///**
* 知识库上传工具类 // * 知识库上传工具类
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseUploader { //public class KnowledgeBaseUploader {
//
/** // /**
* 上传文档到知识库直接处理MultipartFile // * 上传文档到知识库直接处理MultipartFile
* // *
* @param client 阿里云客户端 // * @param client 阿里云客户端
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param file 上传的文件 // * @param file 上传的文件
* @return 新文档的FileID失败返回null // * @return 新文档的FileID失败返回null
*/ // */
public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile file) { // public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile file) {
try { // try {
// 准备文档信息 // // 准备文档信息
String fileName = file.getOriginalFilename(); // String fileName = file.getOriginalFilename();
String fileMd5 = calculateMD5(file.getInputStream()); // String fileMd5 = calculateMD5(file.getInputStream());
String fileSize = String.valueOf(file.getSize()); // String fileSize = String.valueOf(file.getSize());
//
// 申请上传租约 // // 申请上传租约
ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest() // ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
.setFileName(fileName) // .setFileName(fileName)
.setMd5(fileMd5) // .setMd5(fileMd5)
.setSizeInBytes(fileSize); // .setSizeInBytes(fileSize);
//
ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions( // ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
"default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions()); // "default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
//
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); // String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); // String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
//
// 上传文件 // // 上传文件
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
Map<String, String> headers = mapper.readValue(mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()), Map.class); // Map<String, String> headers = mapper.readValue(mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()), Map.class);
//
uploadFile(uploadUrl, headers, file); // uploadFile(uploadUrl, headers, file);
//
// 添加文件到类目 // // 添加文件到类目
AddFileRequest addRequest = new AddFileRequest() // AddFileRequest addRequest = new AddFileRequest()
.setLeaseId(leaseId) // .setLeaseId(leaseId)
.setParser("DASHSCOPE_DOCMIND") // .setParser("DASHSCOPE_DOCMIND")
.setCategoryId("default"); // .setCategoryId("default");
//
AddFileResponse addResponse = client.addFileWithOptions(workspaceId, addRequest, new HashMap<>(), new RuntimeOptions()); // AddFileResponse addResponse = client.addFileWithOptions(workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
//
String fileId = addResponse.getBody().getData().getFileId(); // String fileId = addResponse.getBody().getData().getFileId();
//
// 等待文件解析完成 // // 等待文件解析完成
waitForFileParsing(client, workspaceId, fileId); // waitForFileParsing(client, workspaceId, fileId);
//
// 添加到知识库 // // 添加到知识库
SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest() // SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
.setIndexId(indexId) // .setIndexId(indexId)
.setDocumentIds(Collections.singletonList(fileId)) // .setDocumentIds(Collections.singletonList(fileId))
.setSourceType("DATA_CENTER_FILE"); // .setSourceType("DATA_CENTER_FILE");
//
SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions()); // SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
//
// 等待索引完成 // // 等待索引完成
waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId); // waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
//
return fileId; // return fileId;
//
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
return null; // return null;
} // }
} // }
//
/** // /**
* 批量上传文档到知识库 // * 批量上传文档到知识库
*/ // */
public static List<String> uploadDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile[] files) { // public static List<String> uploadDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile[] files) {
List<String> fileIds = new ArrayList<>(); // List<String> fileIds = new ArrayList<>();
for (MultipartFile file : files) { // for (MultipartFile file : files) {
String fileId = uploadDocument(client, workspaceId, indexId, file); // String fileId = uploadDocument(client, workspaceId, indexId, file);
if (fileId != null) { // if (fileId != null) {
fileIds.add(fileId); // fileIds.add(fileId);
} // }
} // }
return fileIds; // return fileIds;
} // }
//
/** // /**
* 上传文档到知识库 // * 上传文档到知识库
* // *
* @param client 阿里云客户端 // * @param client 阿里云客户端
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param filePath 文档本地路径 // * @param filePath 文档本地路径
* @return 新文档的FileID失败返回null // * @return 新文档的FileID失败返回null
*/ // */
public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String filePath) { // public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String filePath) {
try { // try {
// 准备文档信息 // // 准备文档信息
String fileName = Paths.get(filePath).getFileName().toString(); // String fileName = Paths.get(filePath).getFileName().toString();
String fileMd5 = calculateMD5(filePath); // String fileMd5 = calculateMD5(filePath);
String fileSize = String.valueOf(new File(filePath).length()); // String fileSize = String.valueOf(new File(filePath).length());
//
// 申请上传租约 // // 申请上传租约
ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest() // ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
.setFileName(fileName) // .setFileName(fileName)
.setMd5(fileMd5) // .setMd5(fileMd5)
.setSizeInBytes(fileSize); // .setSizeInBytes(fileSize);
//
ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions( // ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
"default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions()); // "default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
//
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); // String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); // String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
//
// 上传文件 // // 上传文件
ObjectMapper mapper = new ObjectMapper(); // ObjectMapper mapper = new ObjectMapper();
Map<String, String> headers = mapper.readValue( // Map<String, String> headers = mapper.readValue(
mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()), // mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()),
Map.class); // Map.class);
//
uploadFile(uploadUrl, headers, filePath); // uploadFile(uploadUrl, headers, filePath);
//
// 添加文件到类目 // // 添加文件到类目
AddFileRequest addRequest = new AddFileRequest() // AddFileRequest addRequest = new AddFileRequest()
.setLeaseId(leaseId) // .setLeaseId(leaseId)
.setParser("DASHSCOPE_DOCMIND") // .setParser("DASHSCOPE_DOCMIND")
.setCategoryId("default"); // .setCategoryId("default");
//
AddFileResponse addResponse = client.addFileWithOptions( // AddFileResponse addResponse = client.addFileWithOptions(
workspaceId, addRequest, new HashMap<>(), new RuntimeOptions()); // workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
//
String fileId = addResponse.getBody().getData().getFileId(); // String fileId = addResponse.getBody().getData().getFileId();
//
// 等待文件解析完成 // // 等待文件解析完成
waitForFileParsing(client, workspaceId, fileId); // waitForFileParsing(client, workspaceId, fileId);
//
// 添加到知识库 // // 添加到知识库
SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest() // SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
.setIndexId(indexId) // .setIndexId(indexId)
.setDocumentIds(Collections.singletonList(fileId)) // .setDocumentIds(Collections.singletonList(fileId))
.setSourceType("DATA_CENTER_FILE"); // .setSourceType("DATA_CENTER_FILE");
//
SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions( // SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(
workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions()); // workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
//
// 等待索引完成 // // 等待索引完成
waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId); // waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
//
return fileId; // return fileId;
//
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
return null; // return null;
} // }
} // }
//
private static String calculateMD5(String filePath) throws Exception { // private static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5"); // MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) { // try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead); // md.update(buffer, 0, bytesRead);
} // }
} // }
StringBuilder sb = new StringBuilder(); // StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) { // for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff)); // sb.append(String.format("%02x", b & 0xff));
} // }
return sb.toString(); // return sb.toString();
} // }
//
private static void uploadFile(String preSignedUrl, Map<String, String> headers, // private static void uploadFile(String preSignedUrl, Map<String, String> headers,
String filePath) throws Exception { // String filePath) throws Exception {
try (FileInputStream fis = new FileInputStream(filePath)) { // try (FileInputStream fis = new FileInputStream(filePath)) {
HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection(); // HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
conn.setRequestMethod("PUT"); // conn.setRequestMethod("PUT");
conn.setDoOutput(true); // conn.setDoOutput(true);
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra")); // conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type")); // conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) { // while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead); // conn.getOutputStream().write(buffer, 0, bytesRead);
} // }
//
if (conn.getResponseCode() != 200) { // if (conn.getResponseCode() != 200) {
throw new RuntimeException("上传失败: " + conn.getResponseCode()); // throw new RuntimeException("上传失败: " + conn.getResponseCode());
} // }
} // }
} // }
//
private static void waitForFileParsing(com.aliyun.bailian20231229.Client client, // private static void waitForFileParsing(com.aliyun.bailian20231229.Client client,
String workspaceId, String fileId) throws Exception { // String workspaceId, String fileId) throws Exception {
while (true) { // while (true) {
DescribeFileResponse response = client.describeFileWithOptions( // DescribeFileResponse response = client.describeFileWithOptions(
workspaceId, fileId, new HashMap<>(), new RuntimeOptions()); // workspaceId, fileId, new HashMap<>(), new RuntimeOptions());
//
String status = response.getBody().getData().getStatus(); // String status = response.getBody().getData().getStatus();
if ("PARSE_SUCCESS".equals(status)) break; // if ("PARSE_SUCCESS".equals(status)) break;
if ("PARSE_FAILED".equals(status)) throw new RuntimeException("文档解析失败"); // if ("PARSE_FAILED".equals(status)) throw new RuntimeException("文档解析失败");
Thread.sleep(5000); // Thread.sleep(5000);
} // }
} // }
//
private static void waitForIndexJob(com.aliyun.bailian20231229.Client client, // private static void waitForIndexJob(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception { // String workspaceId, String jobId, String indexId) throws Exception {
while (true) { // while (true) {
GetIndexJobStatusRequest request = new GetIndexJobStatusRequest() // GetIndexJobStatusRequest request = new GetIndexJobStatusRequest()
.setIndexId(indexId) // .setIndexId(indexId)
.setJobId(jobId); // .setJobId(jobId);
//
GetIndexJobStatusResponse response = client.getIndexJobStatusWithOptions( // GetIndexJobStatusResponse response = client.getIndexJobStatusWithOptions(
workspaceId, request, new HashMap<>(), new RuntimeOptions()); // workspaceId, request, new HashMap<>(), new RuntimeOptions());
//
String status = response.getBody().getData().getStatus(); // String status = response.getBody().getData().getStatus();
if ("COMPLETED".equals(status)) break; // if ("COMPLETED".equals(status)) break;
if ("FAILED".equals(status)) throw new RuntimeException("索引任务失败"); // if ("FAILED".equals(status)) throw new RuntimeException("索引任务失败");
Thread.sleep(5000); // Thread.sleep(5000);
} // }
} // }
//
private static String calculateMD5(InputStream inputStream) throws Exception { // private static String calculateMD5(InputStream inputStream) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5"); // MessageDigest md = MessageDigest.getInstance("MD5");
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) { // while ((bytesRead = inputStream.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead); // md.update(buffer, 0, bytesRead);
} // }
StringBuilder sb = new StringBuilder(); // StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) { // for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff)); // sb.append(String.format("%02x", b & 0xff));
} // }
return sb.toString(); // return sb.toString();
} // }
//
private static void uploadFile(String preSignedUrl, Map<String, String> headers, // private static void uploadFile(String preSignedUrl, Map<String, String> headers,
MultipartFile file) throws Exception { // MultipartFile file) throws Exception {
HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection(); // HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
conn.setRequestMethod("PUT"); // conn.setRequestMethod("PUT");
conn.setDoOutput(true); // conn.setDoOutput(true);
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra")); // conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type")); // conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
try (InputStream inputStream = file.getInputStream()) { // try (InputStream inputStream = file.getInputStream()) {
byte[] buffer = new byte[4096]; // byte[] buffer = new byte[4096];
int bytesRead; // int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) { // while ((bytesRead = inputStream.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead); // conn.getOutputStream().write(buffer, 0, bytesRead);
} // }
} // }
//
if (conn.getResponseCode() != 200) { // if (conn.getResponseCode() != 200) {
throw new RuntimeException("上传失败: " + conn.getResponseCode()); // throw new RuntimeException("上传失败: " + conn.getResponseCode());
} // }
} // }
//
/** // /**
* 初始化客户端Client // * 初始化客户端Client
* // *
* @return 配置好的客户端对象 // * @return 配置好的客户端对象
*/ // */
public static com.aliyun.bailian20231229.Client createClient(String accessKeyId, String accessKeySecret) throws Exception { // public static com.aliyun.bailian20231229.Client createClient(String accessKeyId, String accessKeySecret) throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() // com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(accessKeyId) // .setAccessKeyId(accessKeyId)
.setAccessKeySecret(accessKeySecret); // .setAccessKeySecret(accessKeySecret);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。 // // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com"; // config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config); // return new com.aliyun.bailian20231229.Client(config);
} // }
//
public static void main(String[] args) throws Exception { // public static void main(String[] args) throws Exception {
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P"; // String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk"; // String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu"; // String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
String indexId = "b9pvwfqp3d"; // String indexId = "b9pvwfqp3d";
String filePath = "D:\\公司经济责任审计方案模板.docx"; // String filePath = "D:\\公司经济责任审计方案模板.docx";
//
Client client = createClient(ALIBABA_CLOUD_ACCESS_KEY_ID, ALIBABA_CLOUD_ACCESS_KEY_SECRET); // Client client = createClient(ALIBABA_CLOUD_ACCESS_KEY_ID, ALIBABA_CLOUD_ACCESS_KEY_SECRET);
//
uploadDocument(client, WORKSPACE_ID, indexId, filePath); // uploadDocument(client, WORKSPACE_ID, indexId, filePath);
} // }
} //}

View File

@@ -1,156 +1,156 @@
package com.gxwebsoft.ai.util; //package com.gxwebsoft.ai.util;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.HashMap; //import java.util.HashMap;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import com.alibaba.fastjson.JSON; //import com.alibaba.fastjson.JSON;
import com.aliyun.bailian20231229.models.CreateIndexResponse; //import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteFileResponse; //import com.aliyun.bailian20231229.models.DeleteFileResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse; //import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
import com.aliyun.bailian20231229.models.DeleteIndexResponse; //import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse; //import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse; //import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.aliyun.bailian20231229.models.RetrieveRequest; //import com.aliyun.bailian20231229.models.RetrieveRequest;
import com.aliyun.bailian20231229.models.RetrieveResponse; //import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.teautil.models.RuntimeOptions; //import com.aliyun.teautil.models.RuntimeOptions;
//
/** ///**
* 知识库工具类 // * 知识库工具类
* @author GIIT-YC // * @author GIIT-YC
* // *
*/ // */
public class KnowledgeBaseUtil { //public class KnowledgeBaseUtil {
//
/** // /**
* 在指定的知识库中检索信息。 // * 在指定的知识库中检索信息。
* // *
* @param client 客户端对象bailian20231229Client // * @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param query 检索查询语句 // * @param query 检索查询语句
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query) throws Exception { // public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest(); // RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId); // retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query); // retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100); // retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100); // retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token // retrieveRequest.setEnableReranking(false);//开启耗费巨量token
RuntimeOptions runtime = new RuntimeOptions(); // RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime); // return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
} // }
//
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception { // public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest(); // RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId); // retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query); // retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100); // retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100); // retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token // retrieveRequest.setEnableReranking(false);//开启耗费巨量token
List<Map<String, String>> searchFilters = new ArrayList<>(); // List<Map<String, String>> searchFilters = new ArrayList<>();
Map<String, String> searchFiltersTags = new HashMap<>(); // Map<String, String> searchFiltersTags = new HashMap<>();
searchFiltersTags.put("tags", JSON.toJSONString(filesIds)); // searchFiltersTags.put("tags", JSON.toJSONString(filesIds));
searchFilters.add(searchFiltersTags); // searchFilters.add(searchFiltersTags);
retrieveRequest.setSearchFilters(searchFilters); // retrieveRequest.setSearchFilters(searchFilters);
RuntimeOptions runtime = new RuntimeOptions(); // RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime); // return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
} // }
//
/** // /**
* 在阿里云百炼服务中创建知识库(初始化)。 // * 在阿里云百炼服务中创建知识库(初始化)。
* // *
* @param client 客户端对象 // * @param client 客户端对象
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param name 知识库名称 // * @param name 知识库名称
* @param desc 知识库描述 // * @param desc 知识库描述
* @return 阿里云百炼服务的响应对象 // * @return 阿里云百炼服务的响应对象
*/ // */
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String name, String desc) throws Exception { // public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String name, String desc) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest(); // com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
createIndexRequest.setStructureType("unstructured"); // createIndexRequest.setStructureType("unstructured");
createIndexRequest.setName(name); // createIndexRequest.setName(name);
createIndexRequest.setDescription(desc); // createIndexRequest.setDescription(desc);
createIndexRequest.setSinkType("DEFAULT"); // createIndexRequest.setSinkType("DEFAULT");
createIndexRequest.setEmbeddingModelName("text-embedding-v4"); // createIndexRequest.setEmbeddingModelName("text-embedding-v4");
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime); // return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
} // }
//
/** // /**
* 获取指定业务空间下一个或多个知识库的详细信息 // * 获取指定业务空间下一个或多个知识库的详细信息
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId) throws Exception { // public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest(); // com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime); // return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
} // }
//
/** // /**
* 永久性删除指定的知识库 // * 永久性删除指定的知识库
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception { // public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>(); // Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest(); // com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
deleteIndexRequest.setIndexId(indexId); // deleteIndexRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); // com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime); // return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
} // }
//
/** // /**
* 查询知识库下的文档列表 // * 查询知识库下的文档列表
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static ListIndexDocumentsResponse listIndexDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, Integer pageSize, Integer pageNumber) throws Exception { // public static ListIndexDocumentsResponse listIndexDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, Integer pageSize, Integer pageNumber) throws Exception {
com.aliyun.bailian20231229.models.ListIndexDocumentsRequest listIndexDocumentsRequest = new com.aliyun.bailian20231229.models.ListIndexDocumentsRequest(); // com.aliyun.bailian20231229.models.ListIndexDocumentsRequest listIndexDocumentsRequest = new com.aliyun.bailian20231229.models.ListIndexDocumentsRequest();
listIndexDocumentsRequest.setIndexId(indexId); // listIndexDocumentsRequest.setIndexId(indexId);
listIndexDocumentsRequest.setPageSize(pageSize); // listIndexDocumentsRequest.setPageSize(pageSize);
listIndexDocumentsRequest.setPageNumber(pageNumber); // listIndexDocumentsRequest.setPageNumber(pageNumber);
return client.listIndexDocuments(workspaceId, listIndexDocumentsRequest); // return client.listIndexDocuments(workspaceId, listIndexDocumentsRequest);
} // }
//
/** // /**
* 删除知识库下的文档 // * 删除知识库下的文档
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param indexId 知识库ID // * @param indexId 知识库ID
* @param ids 删除文件ID列表 // * @param ids 删除文件ID列表
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, List<String> ids) throws Exception { // public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, List<String> ids) throws Exception {
com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest deleteIndexDocumentRequest = new com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest(); // com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest deleteIndexDocumentRequest = new com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest();
deleteIndexDocumentRequest.setIndexId(indexId); // deleteIndexDocumentRequest.setIndexId(indexId);
deleteIndexDocumentRequest.setDocumentIds(ids); // deleteIndexDocumentRequest.setDocumentIds(ids);
return client.deleteIndexDocument(workspaceId, deleteIndexDocumentRequest); // return client.deleteIndexDocument(workspaceId, deleteIndexDocumentRequest);
} // }
//
/** // /**
* 删除阿里云应用数据文档 // * 删除阿里云应用数据文档
* // *
* @param client 客户端Client // * @param client 客户端Client
* @param workspaceId 业务空间ID // * @param workspaceId 业务空间ID
* @param fileId 删除文件ID // * @param fileId 删除文件ID
* @return 阿里云百炼服务的响应 // * @return 阿里云百炼服务的响应
*/ // */
public static DeleteFileResponse deleteAppDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception { // public static DeleteFileResponse deleteAppDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception {
return client.deleteFile(fileId, workspaceId); // return client.deleteFile(fileId, workspaceId);
} // }
} //}