切换知识库工具栏

This commit is contained in:
2026-01-22 10:34:19 +08:00
parent 9689ce9e23
commit bdf4cde39f
10 changed files with 1723 additions and 1693 deletions

View File

@@ -14,7 +14,7 @@ import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.entity.AiCloudFile;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.AiCloudFileService;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil;
import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.common.core.context.TenantContext;
import cn.hutool.core.util.StrUtil;
@@ -146,7 +146,7 @@ public abstract class AbstractAuditContentService {
Client client = clientFactory.createClient();
for (String query : queries) {
try {
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
RetrieveResponse resp = AiCloudKnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
List<RetrieveResponseBodyDataNodes> nodes = Optional.ofNullable(resp)
.map(RetrieveResponse::getBody)
.map(RetrieveResponseBody::getData)

View File

@@ -9,7 +9,7 @@ import com.alibaba.fastjson.JSONObject;
import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.AuditReportService;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil;
import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.pwl.entity.PwlProjectLibrary;
import com.gxwebsoft.pwl.service.PwlProjectLibraryService;
@@ -319,7 +319,7 @@ public class AuditReportServiceImpl implements AuditReportService {
try {
Client client = clientFactory.createClient();
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
RetrieveResponse resp = AiCloudKnowledgeBaseUtil.retrieveIndex(client, workspaceId, kbId, query);
if (resp.getBody() != null && resp.getBody().getData() != null
&& resp.getBody().getData().getNodes() != null) {

View File

@@ -1,207 +1,207 @@
package com.gxwebsoft.ai.service.impl;
import com.aliyun.bailian20231229.Client;
import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.bailian20231229.models.RetrieveResponseBody.RetrieveResponseBodyDataNodes;
import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
import com.gxwebsoft.ai.constants.KnowledgeBaseConstants;
import com.gxwebsoft.ai.dto.KnowledgeBaseRequest;
import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
import com.gxwebsoft.ai.service.KnowledgeBaseService;
import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
import com.gxwebsoft.ai.util.KnowledgeBaseUploader;
import com.gxwebsoft.ai.util.KnowledgeBaseUtil;
import cn.hutool.core.util.StrUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Service
public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
@Autowired
private KnowledgeBaseConfig config;
@Autowired
private KnowledgeBaseClientFactory clientFactory;
@Override
public Set<String> queryKnowledgeBase(KnowledgeBaseRequest req) {
return queryKnowledgeBase(req.getKbId(), req.getQuery(), req.getTopK(), req.getFormCommit());
}
@Override
public Set<String> queryKnowledgeBase(String kbId, String query, Integer topK, Integer formCommit) {
Set<String> result = new LinkedHashSet<>();
String workspaceId = config.getWorkspaceId();
List<String> keyWords = Arrays.asList(KnowledgeBaseConstants.KEY_WORDS);
String indexId = kbId;
String searchQuery = StrUtil.isEmpty(query) ? keyWords.get(formCommit) : query;
Integer searchTopK = topK == null ? 10 : topK;
try {
Client client = clientFactory.createClient();
RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, indexId, searchQuery);
for (RetrieveResponseBodyDataNodes node : resp.getBody().getData().getNodes()) {
result.add(node.getText());
if (result.size() >= searchTopK) {
break;
}
}
} catch (Exception e) {
throw new RuntimeException("查询知识库失败: " + e.getMessage(), e);
}
return result;
}
@Override
public String createKnowledgeBase(String companyName, String companyCode) {
String workspaceId = config.getWorkspaceId();
try {
String kbId = getKnowledgeBaseIdByName(companyCode);
if(StrUtil.isNotEmpty(kbId)) {
return kbId;
}
Client client = clientFactory.createClient();
CreateIndexResponse indexResponse = KnowledgeBaseUtil.createIndex(client, workspaceId, companyCode, companyName);
return indexResponse.getBody().getData().getId();
} catch (Exception e) {
throw new RuntimeException("创建知识库失败: " + e.getMessage(), e);
}
}
@Override
public String createKnowledgeBaseTemp() {
String code = "Temp_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("MMddHHmmssSSS"));
return createKnowledgeBase(code, code);
}
@Override
public boolean existsKnowledgeBase(String companyCode) {
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
return indicesResponse.getBody().getData().getIndices().stream()
.anyMatch(index -> companyCode.equals(index.getName()));
} catch (Exception e) {
throw new RuntimeException("检查知识库是否存在失败: " + e.getMessage(), e);
}
}
@Override
public String getKnowledgeBaseIdByName(String companyCode) {
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
return indicesResponse.getBody().getData().getIndices().stream()
.filter(index -> companyCode.equals(index.getName()))
.findFirst()
.map(index -> index.getId())
.orElse("");
} catch (Exception e) {
throw new RuntimeException("查找知识库ID失败: " + e.getMessage(), e);
}
}
@Override
public Map<String,Object> listDocuments(String kbId, Integer pageSize, Integer pageNumber) {
Map<String,Object> ret = new HashMap<>();
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
ListIndexDocumentsResponse indexDocumentsResponse = KnowledgeBaseUtil.listIndexDocuments(client, workspaceId, kbId, pageSize, pageNumber);
ret.put("data", indexDocumentsResponse.getBody().getData().getDocuments());
ret.put("total", indexDocumentsResponse.getBody().getData().getTotalCount());
} catch (Exception e) {
throw new RuntimeException("查询知识库下的文档列表失败: " + e.getMessage(), e);
}
return ret;
}
@Override
public boolean deleteIndex(String kbId) {
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
DeleteIndexResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndex(client, workspaceId, kbId);
return indexDocumentResponse.getBody().getSuccess();
} catch (Exception e) {
throw new RuntimeException("删除知识库失败: " + e.getMessage(), e);
}
}
@Override
public boolean deleteIndexDocument(String kbId, String fileIds) {
String workspaceId = config.getWorkspaceId();
List<String> ids = StrUtil.splitTrim(fileIds, ",");
try {
Client client = clientFactory.createClient();
DeleteIndexDocumentResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndexDocument(client, workspaceId, kbId, ids);
return indexDocumentResponse.getBody().getSuccess();
} catch (Exception e) {
throw new RuntimeException("删除知识库下的文档失败: " + e.getMessage(), e);
}
}
@Override
public boolean uploadDocuments(String kbId, MultipartFile[] files) {
String workspaceId = config.getWorkspaceId();
int count = files.length;
try {
Client client = clientFactory.createClient();
List<String> fileIds = KnowledgeBaseUploader.uploadDocuments(client, workspaceId, kbId, files);
//上传切片完成后删除原文档(释放云空间)
for(String fileId : fileIds) {
KnowledgeBaseUtil.deleteAppDocument(client, workspaceId, fileId);
}
return !fileIds.isEmpty() && fileIds.size() == count;
} catch (Exception e) {
throw new RuntimeException("上传文档到知识库失败: " + e.getMessage(), e);
}
}
@Async
@Override
public void submitDocuments(String kbId, String fileId) {
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileId);
} catch (Exception e) {
throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
}
}
@Override
public void submitDocuments(String kbId, List<String> fileIds) {
String workspaceId = config.getWorkspaceId();
try {
Client client = clientFactory.createClient();
AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileIds);
} catch (Exception e) {
throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
}
}
}
//package com.gxwebsoft.ai.service.impl;
//
//import com.aliyun.bailian20231229.Client;
//import com.aliyun.bailian20231229.models.CreateIndexResponse;
//import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
//import com.aliyun.bailian20231229.models.DeleteIndexResponse;
//import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
//import com.aliyun.bailian20231229.models.ListIndicesResponse;
//import com.aliyun.bailian20231229.models.RetrieveResponse;
//import com.aliyun.bailian20231229.models.RetrieveResponseBody.RetrieveResponseBodyDataNodes;
//import com.gxwebsoft.ai.config.KnowledgeBaseConfig;
//import com.gxwebsoft.ai.constants.KnowledgeBaseConstants;
//import com.gxwebsoft.ai.dto.KnowledgeBaseRequest;
//import com.gxwebsoft.ai.factory.KnowledgeBaseClientFactory;
//import com.gxwebsoft.ai.service.KnowledgeBaseService;
//import com.gxwebsoft.ai.util.AiCloudKnowledgeBaseUtil;
//import com.gxwebsoft.ai.util.KnowledgeBaseUploader;
//import com.gxwebsoft.ai.util.KnowledgeBaseUtil;
//import cn.hutool.core.util.StrUtil;
//import org.springframework.beans.factory.annotation.Autowired;
//import org.springframework.scheduling.annotation.Async;
//import org.springframework.stereotype.Service;
//import org.springframework.web.multipart.MultipartFile;
//
//import java.time.LocalDateTime;
//import java.time.format.DateTimeFormatter;
//import java.util.Arrays;
//import java.util.HashMap;
//import java.util.LinkedHashSet;
//import java.util.List;
//import java.util.Map;
//import java.util.Set;
//
//@Service
//public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
//
// @Autowired
// private KnowledgeBaseConfig config;
//
// @Autowired
// private KnowledgeBaseClientFactory clientFactory;
//
// @Override
// public Set<String> queryKnowledgeBase(KnowledgeBaseRequest req) {
// return queryKnowledgeBase(req.getKbId(), req.getQuery(), req.getTopK(), req.getFormCommit());
// }
//
// @Override
// public Set<String> queryKnowledgeBase(String kbId, String query, Integer topK, Integer formCommit) {
// Set<String> result = new LinkedHashSet<>();
// String workspaceId = config.getWorkspaceId();
// List<String> keyWords = Arrays.asList(KnowledgeBaseConstants.KEY_WORDS);
// String indexId = kbId;
// String searchQuery = StrUtil.isEmpty(query) ? keyWords.get(formCommit) : query;
// Integer searchTopK = topK == null ? 10 : topK;
//
// try {
// Client client = clientFactory.createClient();
// RetrieveResponse resp = KnowledgeBaseUtil.retrieveIndex(client, workspaceId, indexId, searchQuery);
// for (RetrieveResponseBodyDataNodes node : resp.getBody().getData().getNodes()) {
// result.add(node.getText());
// if (result.size() >= searchTopK) {
// break;
// }
// }
// } catch (Exception e) {
// throw new RuntimeException("查询知识库失败: " + e.getMessage(), e);
// }
// return result;
// }
//
// @Override
// public String createKnowledgeBase(String companyName, String companyCode) {
// String workspaceId = config.getWorkspaceId();
// try {
// String kbId = getKnowledgeBaseIdByName(companyCode);
// if(StrUtil.isNotEmpty(kbId)) {
// return kbId;
// }
//
// Client client = clientFactory.createClient();
// CreateIndexResponse indexResponse = KnowledgeBaseUtil.createIndex(client, workspaceId, companyCode, companyName);
// return indexResponse.getBody().getData().getId();
// } catch (Exception e) {
// throw new RuntimeException("创建知识库失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public String createKnowledgeBaseTemp() {
// String code = "Temp_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("MMddHHmmssSSS"));
// return createKnowledgeBase(code, code);
// }
//
// @Override
// public boolean existsKnowledgeBase(String companyCode) {
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
//
// return indicesResponse.getBody().getData().getIndices().stream()
// .anyMatch(index -> companyCode.equals(index.getName()));
// } catch (Exception e) {
// throw new RuntimeException("检查知识库是否存在失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public String getKnowledgeBaseIdByName(String companyCode) {
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// ListIndicesResponse indicesResponse = KnowledgeBaseUtil.listIndices(client, workspaceId);
//
// return indicesResponse.getBody().getData().getIndices().stream()
// .filter(index -> companyCode.equals(index.getName()))
// .findFirst()
// .map(index -> index.getId())
// .orElse("");
// } catch (Exception e) {
// throw new RuntimeException("查找知识库ID失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public Map<String,Object> listDocuments(String kbId, Integer pageSize, Integer pageNumber) {
// Map<String,Object> ret = new HashMap<>();
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// ListIndexDocumentsResponse indexDocumentsResponse = KnowledgeBaseUtil.listIndexDocuments(client, workspaceId, kbId, pageSize, pageNumber);
// ret.put("data", indexDocumentsResponse.getBody().getData().getDocuments());
// ret.put("total", indexDocumentsResponse.getBody().getData().getTotalCount());
// } catch (Exception e) {
// throw new RuntimeException("查询知识库下的文档列表失败: " + e.getMessage(), e);
// }
// return ret;
// }
//
// @Override
// public boolean deleteIndex(String kbId) {
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// DeleteIndexResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndex(client, workspaceId, kbId);
// return indexDocumentResponse.getBody().getSuccess();
// } catch (Exception e) {
// throw new RuntimeException("删除知识库失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public boolean deleteIndexDocument(String kbId, String fileIds) {
// String workspaceId = config.getWorkspaceId();
// List<String> ids = StrUtil.splitTrim(fileIds, ",");
// try {
// Client client = clientFactory.createClient();
// DeleteIndexDocumentResponse indexDocumentResponse = KnowledgeBaseUtil.deleteIndexDocument(client, workspaceId, kbId, ids);
// return indexDocumentResponse.getBody().getSuccess();
// } catch (Exception e) {
// throw new RuntimeException("删除知识库下的文档失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public boolean uploadDocuments(String kbId, MultipartFile[] files) {
// String workspaceId = config.getWorkspaceId();
// int count = files.length;
// try {
// Client client = clientFactory.createClient();
// List<String> fileIds = KnowledgeBaseUploader.uploadDocuments(client, workspaceId, kbId, files);
// //上传切片完成后删除原文档(释放云空间)
// for(String fileId : fileIds) {
// KnowledgeBaseUtil.deleteAppDocument(client, workspaceId, fileId);
// }
// return !fileIds.isEmpty() && fileIds.size() == count;
// } catch (Exception e) {
// throw new RuntimeException("上传文档到知识库失败: " + e.getMessage(), e);
// }
// }
//
// @Async
// @Override
// public void submitDocuments(String kbId, String fileId) {
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileId);
// } catch (Exception e) {
// throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
// }
// }
//
// @Override
// public void submitDocuments(String kbId, List<String> fileIds) {
// String workspaceId = config.getWorkspaceId();
// try {
// Client client = clientFactory.createClient();
// AiCloudKnowledgeBaseUtil.submitIndexAddDocumentsJob(client, workspaceId, kbId, fileIds);
// } catch (Exception e) {
// throw new RuntimeException("添加文档到知识库失败: " + e.getMessage(), e);
// }
// }
//
//
//}

View File

@@ -1,10 +1,12 @@
package com.gxwebsoft.ai.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSON;
import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteFileResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
@@ -37,7 +39,35 @@ public class AiCloudKnowledgeBaseUtil {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(null);
retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
/**
* 在指定的知识库中检索信息。
*
* @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param query 检索查询语句
* @param filesIds 指定文件
* @return 阿里云百炼服务的响应
*/
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
List<Map<String, String>> searchFilters = new ArrayList<>();
Map<String, String> searchFiltersTags = new HashMap<>();
searchFiltersTags.put("tags", JSON.toJSONString(filesIds));
searchFilters.add(searchFiltersTags);
retrieveRequest.setSearchFilters(searchFilters);
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}

View File

@@ -1,384 +1,384 @@
package com.gxwebsoft.ai.util;
import com.aliyun.bailian20231229.models.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.FileInputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.MessageDigest;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* 创建知识库
* @author GIIT-YC
*
*/
public class KnowledgeBaseCreate {
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
/**
* 检查并提示设置必要的环境变量。
*
* @return true 如果所有必需的环境变量都已设置,否则 false
*/
public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
}
}
return missingVars.isEmpty();
}
/**
* 计算文档的MD5值。
*
* @param filePath 文档本地路径
* @return 文档的MD5值
* @throws Exception 如果计算过程中发生错误
*/
public static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead);
}
}
StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
/**
* 获取文档大小(以字节为单位)。
*
* @param filePath 文档本地路径
* @return 文档大小(以字节为单位)
*/
public static String getFileSize(String filePath) {
File file = new File(filePath);
long fileSize = file.length();
return String.valueOf(fileSize);
}
/**
* 初始化客户端Client
*
* @return 配置好的客户端对象
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 申请文档上传租约。
*
* @param client 客户端对象
* @param categoryId 类目ID
* @param fileName 文档名称
* @param fileMd5 文档的MD5值
* @param fileSize 文档大小(以字节为单位)
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
applyFileUploadLeaseRequest.setFileName(fileName);
applyFileUploadLeaseRequest.setMd5(fileMd5);
applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
applyFileUploadLeaseRequest, headers, runtime);
return applyFileUploadLeaseResponse;
}
/**
* 上传文档到临时存储。
*
* @param preSignedUrl 上传租约中的 URL
* @param headers 上传请求的头部
* @param filePath 文档本地路径
* @throws Exception 如果上传过程中发生错误
*/
public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
File file = new File(filePath);
if (!file.exists() || !file.isFile()) {
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
}
try (FileInputStream fis = new FileInputStream(file)) {
URL url = new URL(preSignedUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("PUT");
conn.setDoOutput(true);
// 设置上传请求头
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
// 分块读取并上传文档
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead);
}
int responseCode = conn.getResponseCode();
if (responseCode != 200) {
throw new RuntimeException("上传失败: " + responseCode);
}
}
}
/**
* 将文档添加到类目中。
*
* @param client 客户端对象
* @param leaseId 租约ID
* @param parser 用于文档的解析器
* @param categoryId 类目ID
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
String categoryId, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
addFileRequest.setLeaseId(leaseId);
addFileRequest.setParser(parser);
addFileRequest.setCategoryId(categoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
}
/**
* 查询文档的基本信息。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param fileId 文档ID
* @return 阿里云百炼服务的响应对象
*/
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
}
/**
* 在阿里云百炼服务中创建知识库(初始化)。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param fileId 文档ID
* @param name 知识库名称
* @param structureType 知识库的数据类型
* @param sourceType 应用数据的数据类型,支持类目类型和文档类型
* @param sinkType 知识库的向量存储类型
* @return 阿里云百炼服务的响应对象
*/
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
createIndexRequest.setStructureType(structureType);
createIndexRequest.setName(name);
createIndexRequest.setSourceType(sourceType);
createIndexRequest.setSinkType(sinkType);
createIndexRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
}
/**
* 向阿里云百炼服务提交索引任务。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象
*/
public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest();
submitIndexJobRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime);
}
/**
* 查询索引任务状态。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param jobId 任务ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象
*/
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
getIndexJobStatusRequest.setIndexId(indexId);
getIndexJobStatusRequest.setJobId(jobId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
GetIndexJobStatusResponse getIndexJobStatusResponse = null;
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
runtime);
return getIndexJobStatusResponse;
}
/**
* 使用阿里云百炼服务创建知识库。
*
* @param filePath 文档本地路径
* @param workspaceId 业务空间ID
* @param name 知识库名称
* @return 如果成功返回知识库ID否则返回 null
*/
public static String createKnowledgeBase(String filePath, String workspaceId, String name) {
// 设置默认值
String categoryId = "default";
String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE";
String structureType = "unstructured";
String sinkType = "DEFAULT";
try {
// 步骤1初始化客户端Client
System.out.println("步骤1初始化Client");
com.aliyun.bailian20231229.Client client = createClient();
// 步骤2准备文档信息
System.out.println("步骤2准备文档信息");
String fileName = new File(filePath).getName();
String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath);
// 步骤3申请上传租约
System.out.println("步骤3向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
// 步骤4上传文档
System.out.println("步骤4上传文档到阿里云百炼");
// 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper();
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath);
// 步骤5将文档添加到服务器
System.out.println("步骤5将文档添加到阿里云百炼服务器");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId();
// 步骤6检查文档状态
System.out.println("步骤6检查阿里云百炼中的文档状态");
while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文档状态:" + status);
if (status.equals("INIT")) {
System.out.println("文档待解析,请稍候...");
} else if (status.equals("PARSING")) {
System.out.println("文档解析中,请稍候...");
} else if (status.equals("PARSE_SUCCESS")) {
System.out.println("文档解析完成!");
break;
} else {
System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
return null;
}
TimeUnit.SECONDS.sleep(5);
}
// 步骤7初始化知识库
System.out.println("步骤7在阿里云百炼中创建知识库");
CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType,
sourceType, sinkType);
String indexId = indexResponse.getBody().getData().getId();
// 步骤8提交索引任务
System.out.println("步骤8向阿里云百炼提交索引任务");
SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId);
String jobId = submitResponse.getBody().getData().getId();
// 步骤9获取索引任务状态
System.out.println("步骤9获取阿里云百炼索引任务状态");
while (true) {
GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = getStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status);
if (status.equals("COMPLETED")) {
break;
}
TimeUnit.SECONDS.sleep(5);
}
System.out.println("阿里云百炼知识库创建成功!");
return indexId;
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
e.printStackTrace();
return null;
}
}
/**
* 主函数。
*/
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
if (!checkEnvironmentVariables()) {
return;
}
System.out.print("请输入您需要上传文档的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
String filePath = scanner.nextLine();
System.out.print("请为您的知识库输入一个名称:");
String kbName = scanner.nextLine();
String workspaceId = System.getenv("WORKSPACE_ID");
String result = createKnowledgeBase(filePath, workspaceId, kbName);
if (result != null) {
System.out.println("知识库ID: " + result);
}
}
}
//package com.gxwebsoft.ai.util;
//
//import com.aliyun.bailian20231229.models.*;
//import com.fasterxml.jackson.databind.ObjectMapper;
//
//import java.io.File;
//import java.io.FileInputStream;
//import java.net.HttpURLConnection;
//import java.net.URL;
//import java.security.MessageDigest;
//import java.util.*;
//import java.util.concurrent.TimeUnit;
//
///**
// * 创建知识库
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseCreate {
//
// String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
// String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
// String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
// /**
// * 检查并提示设置必要的环境变量。
// *
// * @return true 如果所有必需的环境变量都已设置,否则 false
// */
// public static boolean checkEnvironmentVariables() {
// Map<String, String> requiredVars = new HashMap<>();
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
// requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
// List<String> missingVars = new ArrayList<>();
// for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
// String value = System.getenv(entry.getKey());
// if (value == null || value.isEmpty()) {
// missingVars.add(entry.getKey());
// System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
// }
// }
//
// return missingVars.isEmpty();
// }
//
// /**
// * 计算文档的MD5值。
// *
// * @param filePath 文档本地路径
// * @return 文档的MD5值
// * @throws Exception 如果计算过程中发生错误
// */
// public static String calculateMD5(String filePath) throws Exception {
// MessageDigest md = MessageDigest.getInstance("MD5");
// try (FileInputStream fis = new FileInputStream(filePath)) {
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// md.update(buffer, 0, bytesRead);
// }
// }
// StringBuilder sb = new StringBuilder();
// for (byte b : md.digest()) {
// sb.append(String.format("%02x", b & 0xff));
// }
// return sb.toString();
// }
//
// /**
// * 获取文档大小(以字节为单位)。
// *
// * @param filePath 文档本地路径
// * @return 文档大小(以字节为单位)
// */
// public static String getFileSize(String filePath) {
// File file = new File(filePath);
// long fileSize = file.length();
// return String.valueOf(fileSize);
// }
//
// /**
// * 初始化客户端Client
// *
// * @return 配置好的客户端对象
// */
// public static com.aliyun.bailian20231229.Client createClient() throws Exception {
// com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
// .setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
// .setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
// config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// }
//
// /**
// * 申请文档上传租约。
// *
// * @param client 客户端对象
// * @param categoryId 类目ID
// * @param fileName 文档名称
// * @param fileMd5 文档的MD5值
// * @param fileSize 文档大小(以字节为单位)
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应对象
// */
// public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
// String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
// applyFileUploadLeaseRequest.setFileName(fileName);
// applyFileUploadLeaseRequest.setMd5(fileMd5);
// applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
// applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
// applyFileUploadLeaseRequest, headers, runtime);
// return applyFileUploadLeaseResponse;
// }
//
// /**
// * 上传文档到临时存储。
// *
// * @param preSignedUrl 上传租约中的 URL
// * @param headers 上传请求的头部
// * @param filePath 文档本地路径
// * @throws Exception 如果上传过程中发生错误
// */
// public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
// File file = new File(filePath);
// if (!file.exists() || !file.isFile()) {
// throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
// }
//
// try (FileInputStream fis = new FileInputStream(file)) {
// URL url = new URL(preSignedUrl);
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// conn.setRequestMethod("PUT");
// conn.setDoOutput(true);
//
// // 设置上传请求头
// conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
// conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// // 分块读取并上传文档
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// conn.getOutputStream().write(buffer, 0, bytesRead);
// }
//
// int responseCode = conn.getResponseCode();
// if (responseCode != 200) {
// throw new RuntimeException("上传失败: " + responseCode);
// }
// }
// }
//
// /**
// * 将文档添加到类目中。
// *
// * @param client 客户端对象
// * @param leaseId 租约ID
// * @param parser 用于文档的解析器
// * @param categoryId 类目ID
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应对象
// */
// public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
// String categoryId, String workspaceId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
// addFileRequest.setLeaseId(leaseId);
// addFileRequest.setParser(parser);
// addFileRequest.setCategoryId(categoryId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
// }
//
// /**
// * 查询文档的基本信息。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param fileId 文档ID
// * @return 阿里云百炼服务的响应对象
// */
// public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
// String fileId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
// }
//
// /**
// * 在阿里云百炼服务中创建知识库(初始化)。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param fileId 文档ID
// * @param name 知识库名称
// * @param structureType 知识库的数据类型
// * @param sourceType 应用数据的数据类型,支持类目类型和文档类型
// * @param sinkType 知识库的向量存储类型
// * @return 阿里云百炼服务的响应对象
// */
// public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
// String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
// createIndexRequest.setStructureType(structureType);
// createIndexRequest.setName(name);
// createIndexRequest.setSourceType(sourceType);
// createIndexRequest.setSinkType(sinkType);
// createIndexRequest.setDocumentIds(Collections.singletonList(fileId));
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
// }
//
// /**
// * 向阿里云百炼服务提交索引任务。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应对象
// */
// public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
// String indexId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest();
// submitIndexJobRequest.setIndexId(indexId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime);
// }
//
// /**
// * 查询索引任务状态。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param jobId 任务ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应对象
// */
// public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
// String workspaceId, String jobId, String indexId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
// getIndexJobStatusRequest.setIndexId(indexId);
// getIndexJobStatusRequest.setJobId(jobId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// GetIndexJobStatusResponse getIndexJobStatusResponse = null;
// getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
// runtime);
// return getIndexJobStatusResponse;
// }
//
// /**
// * 使用阿里云百炼服务创建知识库。
// *
// * @param filePath 文档本地路径
// * @param workspaceId 业务空间ID
// * @param name 知识库名称
// * @return 如果成功返回知识库ID否则返回 null
// */
// public static String createKnowledgeBase(String filePath, String workspaceId, String name) {
// // 设置默认值
// String categoryId = "default";
// String parser = "DASHSCOPE_DOCMIND";
// String sourceType = "DATA_CENTER_FILE";
// String structureType = "unstructured";
// String sinkType = "DEFAULT";
// try {
// // 步骤1初始化客户端Client
// System.out.println("步骤1初始化Client");
// com.aliyun.bailian20231229.Client client = createClient();
//
// // 步骤2准备文档信息
// System.out.println("步骤2准备文档信息");
// String fileName = new File(filePath).getName();
// String fileMd5 = calculateMD5(filePath);
// String fileSize = getFileSize(filePath);
//
// // 步骤3申请上传租约
// System.out.println("步骤3向阿里云百炼申请上传租约");
// ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
// workspaceId);
// String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
// String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
// Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
//
// // 步骤4上传文档
// System.out.println("步骤4上传文档到阿里云百炼");
// // 请自行安装jackson-databind
// // 将上一步的uploadHeaders转换为Map(Key-Value形式)
// ObjectMapper mapper = new ObjectMapper();
// Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
// .readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
// uploadFile(uploadUrl, uploadHeadersMap, filePath);
//
// // 步骤5将文档添加到服务器
// System.out.println("步骤5将文档添加到阿里云百炼服务器");
// AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
// String fileId = addResponse.getBody().getData().getFileId();
//
// // 步骤6检查文档状态
// System.out.println("步骤6检查阿里云百炼中的文档状态");
// while (true) {
// DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
// String status = describeResponse.getBody().getData().getStatus();
// System.out.println("当前文档状态:" + status);
//
// if (status.equals("INIT")) {
// System.out.println("文档待解析,请稍候...");
// } else if (status.equals("PARSING")) {
// System.out.println("文档解析中,请稍候...");
// } else if (status.equals("PARSE_SUCCESS")) {
// System.out.println("文档解析完成!");
// break;
// } else {
// System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
// return null;
// }
// TimeUnit.SECONDS.sleep(5);
// }
//
// // 步骤7初始化知识库
// System.out.println("步骤7在阿里云百炼中创建知识库");
// CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType,
// sourceType, sinkType);
// String indexId = indexResponse.getBody().getData().getId();
//
// // 步骤8提交索引任务
// System.out.println("步骤8向阿里云百炼提交索引任务");
// SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId);
// String jobId = submitResponse.getBody().getData().getId();
//
// // 步骤9获取索引任务状态
// System.out.println("步骤9获取阿里云百炼索引任务状态");
// while (true) {
// GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
// String status = getStatusResponse.getBody().getData().getStatus();
// System.out.println("当前索引任务状态:" + status);
//
// if (status.equals("COMPLETED")) {
// break;
// }
// TimeUnit.SECONDS.sleep(5);
// }
//
// System.out.println("阿里云百炼知识库创建成功!");
// return indexId;
//
// } catch (Exception e) {
// System.out.println("发生错误:" + e.getMessage());
// e.printStackTrace();
// return null;
// }
// }
//
// /**
// * 主函数。
// */
// public static void main(String[] args) {
// Scanner scanner = new Scanner(System.in);
// if (!checkEnvironmentVariables()) {
// return;
// }
//
// System.out.print("请输入您需要上传文档的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
// String filePath = scanner.nextLine();
//
// System.out.print("请为您的知识库输入一个名称:");
// String kbName = scanner.nextLine();
//
// String workspaceId = System.getenv("WORKSPACE_ID");
// String result = createKnowledgeBase(filePath, workspaceId, kbName);
// if (result != null) {
// System.out.println("知识库ID: " + result);
// }
// }
//}

View File

@@ -1,145 +1,145 @@
package com.gxwebsoft.ai.util;
import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.*;
/**
* 管理知识库
* @author GIIT-YC
*
*/
public class KnowledgeBaseManage {
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
/**
* 检查并提示设置必要的环境变量。
*
* @return true 如果所有必需的环境变量都已设置,否则 false
*/
public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
}
}
return missingVars.isEmpty();
}
/**
* 创建并配置客户端Client
*
* @return 配置好的客户端Client
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.credentials.Client credential = new com.aliyun.credentials.Client();
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setCredential(credential);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 获取指定业务空间下一个或多个知识库的详细信息
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应
*/
public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId)
throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
}
/**
* 永久性删除指定的知识库
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应
*/
public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
deleteIndexRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
}
/**
* 主函数
*/
public static void main(String[] args) {
if (!checkEnvironmentVariables()) {
System.out.println("环境变量校验未通过。");
return;
}
try {
Scanner scanner = new Scanner(System.in);
System.out.print("请选择要执行的操作:\n1. 查看知识库\n2. 删除知识库\n请输入选项1或2");
String startOption = scanner.nextLine();
com.aliyun.bailian20231229.Client client = createClient();
if (startOption.equals("1")) {
// 查看知识库
System.out.println("\n执行查看知识库");
String workspaceId = System.getenv("WORKSPACE_ID");
ListIndicesResponse response = listIndices(client, workspaceId);
// 请自行安装jackson-databind。将响应转换为 JSON 字符串
ObjectMapper mapper = new ObjectMapper();
String result = mapper.writeValueAsString(response.getBody().getData());
System.out.println(result);
} else if (startOption.equals("2")) {
System.out.println("\n执行删除知识库");
String workspaceId = System.getenv("WORKSPACE_ID");
System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
String indexId = scanner.nextLine();
// 删除前二次确认
boolean confirm = false;
while (!confirm) {
System.out.print("您确定要永久性删除该知识库 " + indexId + " 吗?(y/n): ");
String input = scanner.nextLine().trim().toLowerCase();
if (input.equals("y")) {
confirm = true;
} else if (input.equals("n")) {
System.out.println("已取消删除操作。");
return;
} else {
System.out.println("无效输入,请输入 y 或 n。");
}
}
DeleteIndexResponse resp = deleteIndex(client, workspaceId, indexId);
if (resp.getBody().getStatus().equals("200")) {
System.out.println("知识库" + indexId + "删除成功!");
} else {
ObjectMapper mapper = new ObjectMapper();
System.out.println("发生错误:" + mapper.writeValueAsString(resp.getBody()));
}
} else {
System.out.println("无效的选项,程序退出。");
}
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
}
}
}
//package com.gxwebsoft.ai.util;
//
//import com.aliyun.bailian20231229.models.DeleteIndexResponse;
//import com.aliyun.bailian20231229.models.ListIndicesResponse;
//import com.fasterxml.jackson.databind.ObjectMapper;
//
//import java.util.*;
//
///**
// * 管理知识库
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseManage {
//
// String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
// String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
// String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
// /**
// * 检查并提示设置必要的环境变量。
// *
// * @return true 如果所有必需的环境变量都已设置,否则 false
// */
// public static boolean checkEnvironmentVariables() {
// Map<String, String> requiredVars = new HashMap<>();
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
// requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
// List<String> missingVars = new ArrayList<>();
// for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
// String value = System.getenv(entry.getKey());
// if (value == null || value.isEmpty()) {
// missingVars.add(entry.getKey());
// System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
// }
// }
//
// return missingVars.isEmpty();
// }
//
// /**
// * 创建并配置客户端Client
// *
// * @return 配置好的客户端Client
// */
// public static com.aliyun.bailian20231229.Client createClient() throws Exception {
// com.aliyun.credentials.Client credential = new com.aliyun.credentials.Client();
// com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
// .setCredential(credential);
// // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
// config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// }
//
// /**
// * 获取指定业务空间下一个或多个知识库的详细信息
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应
// */
// public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId)
// throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
// }
//
// /**
// * 永久性删除指定的知识库
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应
// */
// public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
// String indexId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
// deleteIndexRequest.setIndexId(indexId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
// }
//
// /**
// * 主函数
// */
// public static void main(String[] args) {
// if (!checkEnvironmentVariables()) {
// System.out.println("环境变量校验未通过。");
// return;
// }
//
// try {
// Scanner scanner = new Scanner(System.in);
// System.out.print("请选择要执行的操作:\n1. 查看知识库\n2. 删除知识库\n请输入选项1或2");
// String startOption = scanner.nextLine();
// com.aliyun.bailian20231229.Client client = createClient();
// if (startOption.equals("1")) {
// // 查看知识库
// System.out.println("\n执行查看知识库");
// String workspaceId = System.getenv("WORKSPACE_ID");
// ListIndicesResponse response = listIndices(client, workspaceId);
// // 请自行安装jackson-databind。将响应转换为 JSON 字符串
// ObjectMapper mapper = new ObjectMapper();
// String result = mapper.writeValueAsString(response.getBody().getData());
// System.out.println(result);
// } else if (startOption.equals("2")) {
// System.out.println("\n执行删除知识库");
// String workspaceId = System.getenv("WORKSPACE_ID");
// System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
// String indexId = scanner.nextLine();
// // 删除前二次确认
// boolean confirm = false;
// while (!confirm) {
// System.out.print("您确定要永久性删除该知识库 " + indexId + " 吗?(y/n): ");
// String input = scanner.nextLine().trim().toLowerCase();
// if (input.equals("y")) {
// confirm = true;
// } else if (input.equals("n")) {
// System.out.println("已取消删除操作。");
// return;
// } else {
// System.out.println("无效输入,请输入 y 或 n。");
// }
// }
// DeleteIndexResponse resp = deleteIndex(client, workspaceId, indexId);
// if (resp.getBody().getStatus().equals("200")) {
// System.out.println("知识库" + indexId + "删除成功!");
// } else {
// ObjectMapper mapper = new ObjectMapper();
// System.out.println("发生错误:" + mapper.writeValueAsString(resp.getBody()));
// }
// } else {
// System.out.println("无效的选项,程序退出。");
// }
// } catch (Exception e) {
// System.out.println("发生错误:" + e.getMessage());
// }
// }
//}

View File

@@ -1,110 +1,110 @@
package com.gxwebsoft.ai.util;
import com.aliyun.bailian20231229.models.RetrieveRequest;
import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.*;
/**
* 检索知识库
* @author GIIT-YC
*
*/
public class KnowledgeBaseRetrieve {
static String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
static String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
static String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
/**
* 检查并提示设置必要的环境变量。
*
* @return true 如果所有必需的环境变量都已设置,否则 false
*/
public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
}
}
return missingVars.isEmpty();
}
/**
* 初始化客户端Client
*
* @return 配置好的客户端对象
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(ALIBABA_CLOUD_ACCESS_KEY_ID)
.setAccessKeySecret(ALIBABA_CLOUD_ACCESS_KEY_SECRET);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 在指定的知识库中检索信息。
*
* @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param query 检索查询语句
* @return 阿里云百炼服务的响应
*/
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
String indexId, String query) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(null);
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
/**
* 使用阿里云百炼服务检索知识库。
*/
public static void main(String[] args) {
// if (!checkEnvironmentVariables()) {
// System.out.println("环境变量校验未通过。");
// return;
//package com.gxwebsoft.ai.util;
//
//import com.aliyun.bailian20231229.models.RetrieveRequest;
//import com.aliyun.bailian20231229.models.RetrieveResponse;
//import com.aliyun.teautil.models.RuntimeOptions;
//import com.fasterxml.jackson.databind.ObjectMapper;
//
//import java.util.*;
//
///**
// * 检索知识库
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseRetrieve {
//
// static String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
// static String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
// static String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
//
// /**
// * 检查并提示设置必要的环境变量。
// *
// * @return true 如果所有必需的环境变量都已设置,否则 false
// */
// public static boolean checkEnvironmentVariables() {
// Map<String, String> requiredVars = new HashMap<>();
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
// requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
// List<String> missingVars = new ArrayList<>();
// for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
// String value = System.getenv(entry.getKey());
// if (value == null || value.isEmpty()) {
// missingVars.add(entry.getKey());
// System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
// }
// }
try {
// 步骤1初始化客户端Client
System.out.println("步骤1创建Client");
com.aliyun.bailian20231229.Client client = createClient();
// 步骤2检索知识库
System.out.println("步骤2检索知识库");
Scanner scanner = new Scanner(System.in);
System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
String indexId = scanner.nextLine();
System.out.print("请输入检索query");
String query = scanner.nextLine();
String workspaceId = WORKSPACE_ID;
RetrieveResponse resp = retrieveIndex(client, workspaceId, indexId, query);
// 请自行安装jackson-databind。将响应体responsebody转换为 JSON 字符串
ObjectMapper mapper = new ObjectMapper();
String result = mapper.writeValueAsString(resp.getBody());
System.out.println(result);
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
}
}
}
//
// return missingVars.isEmpty();
// }
//
// /**
// * 初始化客户端Client
// *
// * @return 配置好的客户端对象
// */
// public static com.aliyun.bailian20231229.Client createClient() throws Exception {
// com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
// .setAccessKeyId(ALIBABA_CLOUD_ACCESS_KEY_ID)
// .setAccessKeySecret(ALIBABA_CLOUD_ACCESS_KEY_SECRET);
// // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
// config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// }
//
// /**
// * 在指定的知识库中检索信息。
// *
// * @param client 客户端对象bailian20231229Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param query 检索查询语句
// * @return 阿里云百炼服务的响应
// */
// public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId,
// String indexId, String query) throws Exception {
// RetrieveRequest retrieveRequest = new RetrieveRequest();
// retrieveRequest.setIndexId(indexId);
// retrieveRequest.setQuery(query);
// retrieveRequest.setDenseSimilarityTopK(null);
// RuntimeOptions runtime = new RuntimeOptions();
// return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
// }
//
// /**
// * 使用阿里云百炼服务检索知识库。
// */
// public static void main(String[] args) {
//// if (!checkEnvironmentVariables()) {
//// System.out.println("环境变量校验未通过。");
//// return;
//// }
//
// try {
// // 步骤1初始化客户端Client
// System.out.println("步骤1创建Client");
// com.aliyun.bailian20231229.Client client = createClient();
//
// // 步骤2检索知识库
// System.out.println("步骤2检索知识库");
// Scanner scanner = new Scanner(System.in);
// System.out.print("请输入知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
// String indexId = scanner.nextLine();
// System.out.print("请输入检索query");
// String query = scanner.nextLine();
// String workspaceId = WORKSPACE_ID;
// RetrieveResponse resp = retrieveIndex(client, workspaceId, indexId, query);
//
// // 请自行安装jackson-databind。将响应体responsebody转换为 JSON 字符串
// ObjectMapper mapper = new ObjectMapper();
// String result = mapper.writeValueAsString(resp.getBody());
// System.out.println(result);
// } catch (Exception e) {
// System.out.println("发生错误:" + e.getMessage());
// }
// }
//}

View File

@@ -1,384 +1,384 @@
package com.gxwebsoft.ai.util;
import com.aliyun.bailian20231229.models.*;
import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.FileInputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.util.*;
/**
* 更新知识库
* @author GIIT-YC
*
*/
public class KnowledgeBaseUpdate {
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
/**
* 检查并提示设置必要的环境变量。
*
* @return true 如果所有必需的环境变量都已设置,否则 false
*/
public static boolean checkEnvironmentVariables() {
Map<String, String> requiredVars = new HashMap<>();
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
List<String> missingVars = new ArrayList<>();
for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
String value = System.getenv(entry.getKey());
if (value == null || value.isEmpty()) {
missingVars.add(entry.getKey());
System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
}
}
return missingVars.isEmpty();
}
/**
* 创建并配置客户端Client
*
* @return 配置好的客户端Client
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 计算文档的MD5值
*
* @param filePath 文档本地路径
* @return 文档的MD5值
*/
public static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead);
}
}
StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
/**
* 获取文档大小(以字节为单位)
*
* @param filePath 文档本地路径
* @return 文档大小(以字节为单位)
*/
public static String getFileSize(String filePath) {
File file = new File(filePath);
long fileSize = file.length();
return String.valueOf(fileSize);
}
/**
* 申请文档上传租约。
*
* @param client 客户端对象
* @param categoryId 类目ID
* @param fileName 文档名称
* @param fileMd5 文档的MD5值
* @param fileSize 文档大小(以字节为单位)
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
applyFileUploadLeaseRequest.setFileName(fileName);
applyFileUploadLeaseRequest.setMd5(fileMd5);
applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
applyFileUploadLeaseRequest, headers, runtime);
return applyFileUploadLeaseResponse;
}
/**
* 上传文档到临时存储。
*
* @param preSignedUrl 上传租约中的 URL
* @param headers 上传请求的头部
* @param filePath 文档本地路径
* @throws Exception 如果上传过程中发生错误
*/
public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
File file = new File(filePath);
if (!file.exists() || !file.isFile()) {
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
}
try (FileInputStream fis = new FileInputStream(file)) {
URL url = new URL(preSignedUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("PUT");
conn.setDoOutput(true);
// 设置上传请求头
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
// 分块读取并上传文档
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead);
}
int responseCode = conn.getResponseCode();
if (responseCode != 200) {
throw new RuntimeException("上传失败: " + responseCode);
}
}
}
/**
* 将文档添加到类目中。
*
* @param client 客户端对象
* @param leaseId 租约ID
* @param parser 用于文档的解析器
* @param categoryId 类目ID
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
String categoryId, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
addFileRequest.setLeaseId(leaseId);
addFileRequest.setParser(parser);
addFileRequest.setCategoryId(categoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
}
/**
* 查询文档的基本信息。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param fileId 文档ID
* @return 阿里云百炼服务的响应对象
*/
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
String fileId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
}
/**
* 向一个非结构化知识库追加导入已解析的文档
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param fileId 文档ID
* @param sourceType 数据类型
* @return 阿里云百炼服务的响应
*/
public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(
com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId,
String sourceType) throws Exception {
Map<String, String> headers = new HashMap<>();
SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest();
submitIndexAddDocumentsJobRequest.setIndexId(indexId);
submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId));
submitIndexAddDocumentsJobRequest.setSourceType(sourceType);
RuntimeOptions runtime = new RuntimeOptions();
return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers,
runtime);
}
/**
* 查询索引任务状态。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param jobId 任务ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象
*/
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
getIndexJobStatusRequest.setIndexId(indexId);
getIndexJobStatusRequest.setJobId(jobId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
GetIndexJobStatusResponse getIndexJobStatusResponse = null;
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
runtime);
return getIndexJobStatusResponse;
}
/**
* 从指定的非结构化知识库中永久删除一个或多个文档
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param fileId 文档ID
* @return 阿里云百炼服务的响应
*/
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client,
String workspaceId, String indexId, String fileId) throws Exception {
Map<String, String> headers = new HashMap<>();
DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest();
deleteIndexDocumentRequest.setIndexId(indexId);
deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime);
}
/**
* 使用阿里云百炼服务更新知识库
*
* @param filePath 文档(更新后的)的实际本地路径
* @param workspaceId 业务空间ID
* @param indexId 需要更新的知识库ID
* @param oldFileId 需要更新的文档的FileID
* @return 如果成功返回知识库ID否则返回 null
*/
public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) {
// 设置默认值
String categoryId = "default";
String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE";
try {
// 步骤1初始化客户端Client
System.out.println("步骤1创建Client");
com.aliyun.bailian20231229.Client client = createClient();
// 步骤2准备文档信息更新后的文档
System.out.println("步骤2准备文档信息");
String fileName = Paths.get(filePath).getFileName().toString();
String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath);
// 步骤3申请上传租约
System.out.println("步骤3向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
// 步骤4上传文档到临时存储
System.out.println("步骤4上传文档到临时存储");
// 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper();
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath);
// 步骤5添加文档到类目中
System.out.println("步骤5添加文档到类目中");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId();
// 步骤6检查更新后的文档状态
System.out.println("步骤6检查阿里云百炼中的文档状态");
while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文档状态:" + status);
if ("INIT".equals(status)) {
System.out.println("文档待解析,请稍候...");
} else if ("PARSING".equals(status)) {
System.out.println("文档解析中,请稍候...");
} else if ("PARSE_SUCCESS".equals(status)) {
System.out.println("文档解析完成!");
break;
} else {
System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
return null;
}
Thread.sleep(5000);
}
// 步骤7提交追加文档任务
System.out.println("步骤7提交追加文档任务");
SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId,
indexId, fileId, sourceType);
String jobId = indexAddResponse.getBody().getData().getId();
// 步骤8等待追加任务完成
System.out.println("步骤8等待追加任务完成");
while (true) {
GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = jobStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status);
if ("COMPLETED".equals(status)) {
break;
}
Thread.sleep(5000);
}
// 步骤9删除旧文档
System.out.println("步骤9删除旧文档");
deleteIndexDocument(client, workspaceId, indexId, oldFileId);
System.out.println("阿里云百炼知识库更新成功!");
return indexId;
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
return null;
}
}
/**
* 主函数。
*/
public static void main(String[] args) {
if (!checkEnvironmentVariables()) {
System.out.println("环境变量校验未通过。");
return;
}
Scanner scanner = new Scanner(System.in);
System.out.print("请输入您需要上传文档更新后的的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
String filePath = scanner.nextLine();
System.out.print("请输入需要更新的知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
String indexId = scanner.nextLine(); // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面单击文件名称旁的 ID 图标获取。
System.out.print("请输入需要更新的文档的 FileID");
String oldFileId = scanner.nextLine();
String workspaceId = System.getenv("WORKSPACE_ID");
String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId);
if (result != null) {
System.out.println("知识库更新成功返回知识库ID: " + result);
} else {
System.out.println("知识库更新失败。");
}
}
}
//package com.gxwebsoft.ai.util;
//
//import com.aliyun.bailian20231229.models.*;
//import com.aliyun.teautil.models.RuntimeOptions;
//import com.fasterxml.jackson.databind.ObjectMapper;
//
//import java.io.File;
//import java.io.FileInputStream;
//import java.net.HttpURLConnection;
//import java.net.URL;
//import java.nio.file.Paths;
//import java.security.MessageDigest;
//import java.util.*;
//
///**
// * 更新知识库
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseUpdate {
//
// String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
// String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
// String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
//
// /**
// * 检查并提示设置必要的环境变量。
// *
// * @return true 如果所有必需的环境变量都已设置,否则 false
// */
// public static boolean checkEnvironmentVariables() {
// Map<String, String> requiredVars = new HashMap<>();
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_ID", "阿里云访问密钥ID");
// requiredVars.put("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "阿里云访问密钥密码");
// requiredVars.put("WORKSPACE_ID", "阿里云百炼业务空间ID");
//
// List<String> missingVars = new ArrayList<>();
// for (Map.Entry<String, String> entry : requiredVars.entrySet()) {
// String value = System.getenv(entry.getKey());
// if (value == null || value.isEmpty()) {
// missingVars.add(entry.getKey());
// System.out.println("错误:请设置 " + entry.getKey() + " 环境变量 (" + entry.getValue() + ")");
// }
// }
//
// return missingVars.isEmpty();
// }
//
// /**
// * 创建并配置客户端Client
// *
// * @return 配置好的客户端Client
// */
// public static com.aliyun.bailian20231229.Client createClient() throws Exception {
// com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
// .setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"))
// .setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));
// // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
// config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// }
//
// /**
// * 计算文档的MD5值
// *
// * @param filePath 文档本地路径
// * @return 文档的MD5值
// */
// public static String calculateMD5(String filePath) throws Exception {
// MessageDigest md = MessageDigest.getInstance("MD5");
// try (FileInputStream fis = new FileInputStream(filePath)) {
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// md.update(buffer, 0, bytesRead);
// }
// }
// StringBuilder sb = new StringBuilder();
// for (byte b : md.digest()) {
// sb.append(String.format("%02x", b & 0xff));
// }
// return sb.toString();
// }
//
// /**
// * 获取文档大小(以字节为单位)
// *
// * @param filePath 文档本地路径
// * @return 文档大小(以字节为单位)
// */
// public static String getFileSize(String filePath) {
// File file = new File(filePath);
// long fileSize = file.length();
// return String.valueOf(fileSize);
// }
//
// /**
// * 申请文档上传租约。
// *
// * @param client 客户端对象
// * @param categoryId 类目ID
// * @param fileName 文档名称
// * @param fileMd5 文档的MD5值
// * @param fileSize 文档大小(以字节为单位)
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应对象
// */
// public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId,
// String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
// applyFileUploadLeaseRequest.setFileName(fileName);
// applyFileUploadLeaseRequest.setMd5(fileMd5);
// applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
// applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId,
// applyFileUploadLeaseRequest, headers, runtime);
// return applyFileUploadLeaseResponse;
// }
//
// /**
// * 上传文档到临时存储。
// *
// * @param preSignedUrl 上传租约中的 URL
// * @param headers 上传请求的头部
// * @param filePath 文档本地路径
// * @throws Exception 如果上传过程中发生错误
// */
// public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
// File file = new File(filePath);
// if (!file.exists() || !file.isFile()) {
// throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
// }
//
// try (FileInputStream fis = new FileInputStream(file)) {
// URL url = new URL(preSignedUrl);
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// conn.setRequestMethod("PUT");
// conn.setDoOutput(true);
//
// // 设置上传请求头
// conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
// conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// // 分块读取并上传文档
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// conn.getOutputStream().write(buffer, 0, bytesRead);
// }
//
// int responseCode = conn.getResponseCode();
// if (responseCode != 200) {
// throw new RuntimeException("上传失败: " + responseCode);
// }
// }
// }
//
// /**
// * 将文档添加到类目中。
// *
// * @param client 客户端对象
// * @param leaseId 租约ID
// * @param parser 用于文档的解析器
// * @param categoryId 类目ID
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应对象
// */
// public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser,
// String categoryId, String workspaceId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
// addFileRequest.setLeaseId(leaseId);
// addFileRequest.setParser(parser);
// addFileRequest.setCategoryId(categoryId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
// }
//
// /**
// * 查询文档的基本信息。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param fileId 文档ID
// * @return 阿里云百炼服务的响应对象
// */
// public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId,
// String fileId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
// }
//
// /**
// * 向一个非结构化知识库追加导入已解析的文档
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param fileId 文档ID
// * @param sourceType 数据类型
// * @return 阿里云百炼服务的响应
// */
// public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(
// com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId,
// String sourceType) throws Exception {
// Map<String, String> headers = new HashMap<>();
// SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest();
// submitIndexAddDocumentsJobRequest.setIndexId(indexId);
// submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId));
// submitIndexAddDocumentsJobRequest.setSourceType(sourceType);
// RuntimeOptions runtime = new RuntimeOptions();
// return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers,
// runtime);
// }
//
// /**
// * 查询索引任务状态。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param jobId 任务ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应对象
// */
// public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client,
// String workspaceId, String jobId, String indexId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
// getIndexJobStatusRequest.setIndexId(indexId);
// getIndexJobStatusRequest.setJobId(jobId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// GetIndexJobStatusResponse getIndexJobStatusResponse = null;
// getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers,
// runtime);
// return getIndexJobStatusResponse;
// }
//
// /**
// * 从指定的非结构化知识库中永久删除一个或多个文档
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param fileId 文档ID
// * @return 阿里云百炼服务的响应
// */
// public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client,
// String workspaceId, String indexId, String fileId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest();
// deleteIndexDocumentRequest.setIndexId(indexId);
// deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId));
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime);
// }
//
// /**
// * 使用阿里云百炼服务更新知识库
// *
// * @param filePath 文档(更新后的)的实际本地路径
// * @param workspaceId 业务空间ID
// * @param indexId 需要更新的知识库ID
// * @param oldFileId 需要更新的文档的FileID
// * @return 如果成功返回知识库ID否则返回 null
// */
// public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) {
// // 设置默认值
// String categoryId = "default";
// String parser = "DASHSCOPE_DOCMIND";
// String sourceType = "DATA_CENTER_FILE";
// try {
// // 步骤1初始化客户端Client
// System.out.println("步骤1创建Client");
// com.aliyun.bailian20231229.Client client = createClient();
//
// // 步骤2准备文档信息更新后的文档
// System.out.println("步骤2准备文档信息");
// String fileName = Paths.get(filePath).getFileName().toString();
// String fileMd5 = calculateMD5(filePath);
// String fileSize = getFileSize(filePath);
//
// // 步骤3申请上传租约
// System.out.println("步骤3向阿里云百炼申请上传租约");
// ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize,
// workspaceId);
// String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
// String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
// Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
//
// // 步骤4上传文档到临时存储
// System.out.println("步骤4上传文档到临时存储");
// // 请自行安装jackson-databind
// // 将上一步的uploadHeaders转换为Map(Key-Value形式)
// ObjectMapper mapper = new ObjectMapper();
// Map<String, String> uploadHeadersMap = (Map<String, String>) mapper
// .readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
// uploadFile(uploadUrl, uploadHeadersMap, filePath);
//
// // 步骤5添加文档到类目中
// System.out.println("步骤5添加文档到类目中");
// AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
// String fileId = addResponse.getBody().getData().getFileId();
//
// // 步骤6检查更新后的文档状态
// System.out.println("步骤6检查阿里云百炼中的文档状态");
// while (true) {
// DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
// String status = describeResponse.getBody().getData().getStatus();
// System.out.println("当前文档状态:" + status);
// if ("INIT".equals(status)) {
// System.out.println("文档待解析,请稍候...");
// } else if ("PARSING".equals(status)) {
// System.out.println("文档解析中,请稍候...");
// } else if ("PARSE_SUCCESS".equals(status)) {
// System.out.println("文档解析完成!");
// break;
// } else {
// System.out.println("未知的文档状态:" + status + ",请联系技术支持。");
// return null;
// }
// Thread.sleep(5000);
// }
//
// // 步骤7提交追加文档任务
// System.out.println("步骤7提交追加文档任务");
// SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId,
// indexId, fileId, sourceType);
// String jobId = indexAddResponse.getBody().getData().getId();
//
// // 步骤8等待追加任务完成
// System.out.println("步骤8等待追加任务完成");
// while (true) {
// GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
// String status = jobStatusResponse.getBody().getData().getStatus();
// System.out.println("当前索引任务状态:" + status);
// if ("COMPLETED".equals(status)) {
// break;
// }
// Thread.sleep(5000);
// }
//
// // 步骤9删除旧文档
// System.out.println("步骤9删除旧文档");
// deleteIndexDocument(client, workspaceId, indexId, oldFileId);
//
// System.out.println("阿里云百炼知识库更新成功!");
// return indexId;
// } catch (Exception e) {
// System.out.println("发生错误:" + e.getMessage());
// return null;
// }
// }
//
// /**
// * 主函数。
// */
// public static void main(String[] args) {
// if (!checkEnvironmentVariables()) {
// System.out.println("环境变量校验未通过。");
// return;
// }
//
// Scanner scanner = new Scanner(System.in);
// System.out.print("请输入您需要上传文档更新后的的实际本地路径以Linux为例/xxx/xxx/阿里云百炼系列手机产品介绍.docx");
// String filePath = scanner.nextLine();
//
// System.out.print("请输入需要更新的知识库ID"); // 即 CreateIndex 接口返回的 Data.Id您也可以在阿里云百炼控制台的知识库页面获取。
// String indexId = scanner.nextLine(); // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面单击文件名称旁的 ID 图标获取。
//
// System.out.print("请输入需要更新的文档的 FileID");
// String oldFileId = scanner.nextLine();
//
// String workspaceId = System.getenv("WORKSPACE_ID");
// String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId);
// if (result != null) {
// System.out.println("知识库更新成功返回知识库ID: " + result);
// } else {
// System.out.println("知识库更新失败。");
// }
// }
//}

View File

@@ -1,303 +1,303 @@
package com.gxwebsoft.ai.util;
import com.aliyun.bailian20231229.Client;
import com.aliyun.bailian20231229.models.*;
import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.util.*;
import org.springframework.web.multipart.MultipartFile;
/**
* 知识库上传工具类
* @author GIIT-YC
*
*/
public class KnowledgeBaseUploader {
/**
* 上传文档到知识库直接处理MultipartFile
*
* @param client 阿里云客户端
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param file 上传的文件
* @return 新文档的FileID失败返回null
*/
public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile file) {
try {
// 准备文档信息
String fileName = file.getOriginalFilename();
String fileMd5 = calculateMD5(file.getInputStream());
String fileSize = String.valueOf(file.getSize());
// 申请上传租约
ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
.setFileName(fileName)
.setMd5(fileMd5)
.setSizeInBytes(fileSize);
ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
"default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
// 上传文件
ObjectMapper mapper = new ObjectMapper();
Map<String, String> headers = mapper.readValue(mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()), Map.class);
uploadFile(uploadUrl, headers, file);
// 添加文件到类目
AddFileRequest addRequest = new AddFileRequest()
.setLeaseId(leaseId)
.setParser("DASHSCOPE_DOCMIND")
.setCategoryId("default");
AddFileResponse addResponse = client.addFileWithOptions(workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
String fileId = addResponse.getBody().getData().getFileId();
// 等待文件解析完成
waitForFileParsing(client, workspaceId, fileId);
// 添加到知识库
SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
.setIndexId(indexId)
.setDocumentIds(Collections.singletonList(fileId))
.setSourceType("DATA_CENTER_FILE");
SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
// 等待索引完成
waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
return fileId;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* 批量上传文档到知识库
*/
public static List<String> uploadDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile[] files) {
List<String> fileIds = new ArrayList<>();
for (MultipartFile file : files) {
String fileId = uploadDocument(client, workspaceId, indexId, file);
if (fileId != null) {
fileIds.add(fileId);
}
}
return fileIds;
}
/**
* 上传文档到知识库
*
* @param client 阿里云客户端
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param filePath 文档本地路径
* @return 新文档的FileID失败返回null
*/
public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String filePath) {
try {
// 准备文档信息
String fileName = Paths.get(filePath).getFileName().toString();
String fileMd5 = calculateMD5(filePath);
String fileSize = String.valueOf(new File(filePath).length());
// 申请上传租约
ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
.setFileName(fileName)
.setMd5(fileMd5)
.setSizeInBytes(fileSize);
ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
"default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
// 上传文件
ObjectMapper mapper = new ObjectMapper();
Map<String, String> headers = mapper.readValue(
mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()),
Map.class);
uploadFile(uploadUrl, headers, filePath);
// 添加文件到类目
AddFileRequest addRequest = new AddFileRequest()
.setLeaseId(leaseId)
.setParser("DASHSCOPE_DOCMIND")
.setCategoryId("default");
AddFileResponse addResponse = client.addFileWithOptions(
workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
String fileId = addResponse.getBody().getData().getFileId();
// 等待文件解析完成
waitForFileParsing(client, workspaceId, fileId);
// 添加到知识库
SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
.setIndexId(indexId)
.setDocumentIds(Collections.singletonList(fileId))
.setSourceType("DATA_CENTER_FILE");
SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(
workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
// 等待索引完成
waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
return fileId;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
private static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead);
}
}
StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
private static void uploadFile(String preSignedUrl, Map<String, String> headers,
String filePath) throws Exception {
try (FileInputStream fis = new FileInputStream(filePath)) {
HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
conn.setRequestMethod("PUT");
conn.setDoOutput(true);
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead);
}
if (conn.getResponseCode() != 200) {
throw new RuntimeException("上传失败: " + conn.getResponseCode());
}
}
}
private static void waitForFileParsing(com.aliyun.bailian20231229.Client client,
String workspaceId, String fileId) throws Exception {
while (true) {
DescribeFileResponse response = client.describeFileWithOptions(
workspaceId, fileId, new HashMap<>(), new RuntimeOptions());
String status = response.getBody().getData().getStatus();
if ("PARSE_SUCCESS".equals(status)) break;
if ("PARSE_FAILED".equals(status)) throw new RuntimeException("文档解析失败");
Thread.sleep(5000);
}
}
private static void waitForIndexJob(com.aliyun.bailian20231229.Client client,
String workspaceId, String jobId, String indexId) throws Exception {
while (true) {
GetIndexJobStatusRequest request = new GetIndexJobStatusRequest()
.setIndexId(indexId)
.setJobId(jobId);
GetIndexJobStatusResponse response = client.getIndexJobStatusWithOptions(
workspaceId, request, new HashMap<>(), new RuntimeOptions());
String status = response.getBody().getData().getStatus();
if ("COMPLETED".equals(status)) break;
if ("FAILED".equals(status)) throw new RuntimeException("索引任务失败");
Thread.sleep(5000);
}
}
private static String calculateMD5(InputStream inputStream) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead);
}
StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
private static void uploadFile(String preSignedUrl, Map<String, String> headers,
MultipartFile file) throws Exception {
HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
conn.setRequestMethod("PUT");
conn.setDoOutput(true);
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
try (InputStream inputStream = file.getInputStream()) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead);
}
}
if (conn.getResponseCode() != 200) {
throw new RuntimeException("上传失败: " + conn.getResponseCode());
}
}
/**
* 初始化客户端Client
*
* @return 配置好的客户端对象
*/
public static com.aliyun.bailian20231229.Client createClient(String accessKeyId, String accessKeySecret) throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(accessKeyId)
.setAccessKeySecret(accessKeySecret);
// 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
config.endpoint = "bailian.cn-beijing.aliyuncs.com";
return new com.aliyun.bailian20231229.Client(config);
}
public static void main(String[] args) throws Exception {
String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
String indexId = "b9pvwfqp3d";
String filePath = "D:\\公司经济责任审计方案模板.docx";
Client client = createClient(ALIBABA_CLOUD_ACCESS_KEY_ID, ALIBABA_CLOUD_ACCESS_KEY_SECRET);
uploadDocument(client, WORKSPACE_ID, indexId, filePath);
}
}
//package com.gxwebsoft.ai.util;
//
//import com.aliyun.bailian20231229.Client;
//import com.aliyun.bailian20231229.models.*;
//import com.aliyun.teautil.models.RuntimeOptions;
//import com.fasterxml.jackson.databind.ObjectMapper;
//
//import java.io.File;
//import java.io.FileInputStream;
//import java.io.InputStream;
//import java.net.HttpURLConnection;
//import java.net.URL;
//import java.nio.file.Paths;
//import java.security.MessageDigest;
//import java.util.*;
//
//import org.springframework.web.multipart.MultipartFile;
//
///**
// * 知识库上传工具类
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseUploader {
//
// /**
// * 上传文档到知识库直接处理MultipartFile
// *
// * @param client 阿里云客户端
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param file 上传的文件
// * @return 新文档的FileID失败返回null
// */
// public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile file) {
// try {
// // 准备文档信息
// String fileName = file.getOriginalFilename();
// String fileMd5 = calculateMD5(file.getInputStream());
// String fileSize = String.valueOf(file.getSize());
//
// // 申请上传租约
// ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
// .setFileName(fileName)
// .setMd5(fileMd5)
// .setSizeInBytes(fileSize);
//
// ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
// "default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
//
// String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
// String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
//
// // 上传文件
// ObjectMapper mapper = new ObjectMapper();
// Map<String, String> headers = mapper.readValue(mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()), Map.class);
//
// uploadFile(uploadUrl, headers, file);
//
// // 添加文件到类目
// AddFileRequest addRequest = new AddFileRequest()
// .setLeaseId(leaseId)
// .setParser("DASHSCOPE_DOCMIND")
// .setCategoryId("default");
//
// AddFileResponse addResponse = client.addFileWithOptions(workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
//
// String fileId = addResponse.getBody().getData().getFileId();
//
// // 等待文件解析完成
// waitForFileParsing(client, workspaceId, fileId);
//
// // 添加到知识库
// SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
// .setIndexId(indexId)
// .setDocumentIds(Collections.singletonList(fileId))
// .setSourceType("DATA_CENTER_FILE");
//
// SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
//
// // 等待索引完成
// waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
//
// return fileId;
//
// } catch (Exception e) {
// e.printStackTrace();
// return null;
// }
// }
//
// /**
// * 批量上传文档到知识库
// */
// public static List<String> uploadDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, MultipartFile[] files) {
// List<String> fileIds = new ArrayList<>();
// for (MultipartFile file : files) {
// String fileId = uploadDocument(client, workspaceId, indexId, file);
// if (fileId != null) {
// fileIds.add(fileId);
// }
// }
// return fileIds;
// }
//
// /**
// * 上传文档到知识库
// *
// * @param client 阿里云客户端
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param filePath 文档本地路径
// * @return 新文档的FileID失败返回null
// */
// public static String uploadDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String filePath) {
// try {
// // 准备文档信息
// String fileName = Paths.get(filePath).getFileName().toString();
// String fileMd5 = calculateMD5(filePath);
// String fileSize = String.valueOf(new File(filePath).length());
//
// // 申请上传租约
// ApplyFileUploadLeaseRequest leaseRequest = new ApplyFileUploadLeaseRequest()
// .setFileName(fileName)
// .setMd5(fileMd5)
// .setSizeInBytes(fileSize);
//
// ApplyFileUploadLeaseResponse leaseResponse = client.applyFileUploadLeaseWithOptions(
// "default", workspaceId, leaseRequest, new HashMap<>(), new RuntimeOptions());
//
// String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
// String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
//
// // 上传文件
// ObjectMapper mapper = new ObjectMapper();
// Map<String, String> headers = mapper.readValue(
// mapper.writeValueAsString(leaseResponse.getBody().getData().getParam().getHeaders()),
// Map.class);
//
// uploadFile(uploadUrl, headers, filePath);
//
// // 添加文件到类目
// AddFileRequest addRequest = new AddFileRequest()
// .setLeaseId(leaseId)
// .setParser("DASHSCOPE_DOCMIND")
// .setCategoryId("default");
//
// AddFileResponse addResponse = client.addFileWithOptions(
// workspaceId, addRequest, new HashMap<>(), new RuntimeOptions());
//
// String fileId = addResponse.getBody().getData().getFileId();
//
// // 等待文件解析完成
// waitForFileParsing(client, workspaceId, fileId);
//
// // 添加到知识库
// SubmitIndexAddDocumentsJobRequest indexRequest = new SubmitIndexAddDocumentsJobRequest()
// .setIndexId(indexId)
// .setDocumentIds(Collections.singletonList(fileId))
// .setSourceType("DATA_CENTER_FILE");
//
// SubmitIndexAddDocumentsJobResponse indexResponse = client.submitIndexAddDocumentsJobWithOptions(
// workspaceId, indexRequest, new HashMap<>(), new RuntimeOptions());
//
// // 等待索引完成
// waitForIndexJob(client, workspaceId, indexResponse.getBody().getData().getId(), indexId);
//
// return fileId;
//
// } catch (Exception e) {
// e.printStackTrace();
// return null;
// }
// }
//
// private static String calculateMD5(String filePath) throws Exception {
// MessageDigest md = MessageDigest.getInstance("MD5");
// try (FileInputStream fis = new FileInputStream(filePath)) {
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// md.update(buffer, 0, bytesRead);
// }
// }
// StringBuilder sb = new StringBuilder();
// for (byte b : md.digest()) {
// sb.append(String.format("%02x", b & 0xff));
// }
// return sb.toString();
// }
//
// private static void uploadFile(String preSignedUrl, Map<String, String> headers,
// String filePath) throws Exception {
// try (FileInputStream fis = new FileInputStream(filePath)) {
// HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
// conn.setRequestMethod("PUT");
// conn.setDoOutput(true);
// conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
// conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = fis.read(buffer)) != -1) {
// conn.getOutputStream().write(buffer, 0, bytesRead);
// }
//
// if (conn.getResponseCode() != 200) {
// throw new RuntimeException("上传失败: " + conn.getResponseCode());
// }
// }
// }
//
// private static void waitForFileParsing(com.aliyun.bailian20231229.Client client,
// String workspaceId, String fileId) throws Exception {
// while (true) {
// DescribeFileResponse response = client.describeFileWithOptions(
// workspaceId, fileId, new HashMap<>(), new RuntimeOptions());
//
// String status = response.getBody().getData().getStatus();
// if ("PARSE_SUCCESS".equals(status)) break;
// if ("PARSE_FAILED".equals(status)) throw new RuntimeException("文档解析失败");
// Thread.sleep(5000);
// }
// }
//
// private static void waitForIndexJob(com.aliyun.bailian20231229.Client client,
// String workspaceId, String jobId, String indexId) throws Exception {
// while (true) {
// GetIndexJobStatusRequest request = new GetIndexJobStatusRequest()
// .setIndexId(indexId)
// .setJobId(jobId);
//
// GetIndexJobStatusResponse response = client.getIndexJobStatusWithOptions(
// workspaceId, request, new HashMap<>(), new RuntimeOptions());
//
// String status = response.getBody().getData().getStatus();
// if ("COMPLETED".equals(status)) break;
// if ("FAILED".equals(status)) throw new RuntimeException("索引任务失败");
// Thread.sleep(5000);
// }
// }
//
// private static String calculateMD5(InputStream inputStream) throws Exception {
// MessageDigest md = MessageDigest.getInstance("MD5");
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = inputStream.read(buffer)) != -1) {
// md.update(buffer, 0, bytesRead);
// }
// StringBuilder sb = new StringBuilder();
// for (byte b : md.digest()) {
// sb.append(String.format("%02x", b & 0xff));
// }
// return sb.toString();
// }
//
// private static void uploadFile(String preSignedUrl, Map<String, String> headers,
// MultipartFile file) throws Exception {
// HttpURLConnection conn = (HttpURLConnection) new URL(preSignedUrl).openConnection();
// conn.setRequestMethod("PUT");
// conn.setDoOutput(true);
// conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
// conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
//
// try (InputStream inputStream = file.getInputStream()) {
// byte[] buffer = new byte[4096];
// int bytesRead;
// while ((bytesRead = inputStream.read(buffer)) != -1) {
// conn.getOutputStream().write(buffer, 0, bytesRead);
// }
// }
//
// if (conn.getResponseCode() != 200) {
// throw new RuntimeException("上传失败: " + conn.getResponseCode());
// }
// }
//
// /**
// * 初始化客户端Client
// *
// * @return 配置好的客户端对象
// */
// public static com.aliyun.bailian20231229.Client createClient(String accessKeyId, String accessKeySecret) throws Exception {
// com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
// .setAccessKeyId(accessKeyId)
// .setAccessKeySecret(accessKeySecret);
// // 下方接入地址以公有云的公网接入地址为例,可按需更换接入地址。
// config.endpoint = "bailian.cn-beijing.aliyuncs.com";
// return new com.aliyun.bailian20231229.Client(config);
// }
//
// public static void main(String[] args) throws Exception {
// String ALIBABA_CLOUD_ACCESS_KEY_ID = "LTAI5tD5YRKuxWz6Eg7qrM4P";
// String ALIBABA_CLOUD_ACCESS_KEY_SECRET = "bO8TBDXflOwbtSKimPpG8XrJnyzgTk";
// String WORKSPACE_ID = "llm-4pf5auwewoz34zqu";
// String indexId = "b9pvwfqp3d";
// String filePath = "D:\\公司经济责任审计方案模板.docx";
//
// Client client = createClient(ALIBABA_CLOUD_ACCESS_KEY_ID, ALIBABA_CLOUD_ACCESS_KEY_SECRET);
//
// uploadDocument(client, WORKSPACE_ID, indexId, filePath);
// }
//}

View File

@@ -1,156 +1,156 @@
package com.gxwebsoft.ai.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSON;
import com.aliyun.bailian20231229.models.CreateIndexResponse;
import com.aliyun.bailian20231229.models.DeleteFileResponse;
import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
import com.aliyun.bailian20231229.models.DeleteIndexResponse;
import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
import com.aliyun.bailian20231229.models.ListIndicesResponse;
import com.aliyun.bailian20231229.models.RetrieveRequest;
import com.aliyun.bailian20231229.models.RetrieveResponse;
import com.aliyun.teautil.models.RuntimeOptions;
/**
* 知识库工具类
* @author GIIT-YC
*
*/
public class KnowledgeBaseUtil {
/**
* 在指定的知识库中检索信息。
*
* @param client 客户端对象bailian20231229Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param query 检索查询语句
* @return 阿里云百炼服务的响应
*/
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception {
RetrieveRequest retrieveRequest = new RetrieveRequest();
retrieveRequest.setIndexId(indexId);
retrieveRequest.setQuery(query);
retrieveRequest.setDenseSimilarityTopK(100);
retrieveRequest.setSparseSimilarityTopK(100);
retrieveRequest.setEnableReranking(false);//开启耗费巨量token
List<Map<String, String>> searchFilters = new ArrayList<>();
Map<String, String> searchFiltersTags = new HashMap<>();
searchFiltersTags.put("tags", JSON.toJSONString(filesIds));
searchFilters.add(searchFiltersTags);
retrieveRequest.setSearchFilters(searchFilters);
RuntimeOptions runtime = new RuntimeOptions();
return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
}
/**
* 在阿里云百炼服务中创建知识库(初始化)。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param name 知识库名称
* @param desc 知识库描述
* @return 阿里云百炼服务的响应对象
*/
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String name, String desc) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
createIndexRequest.setStructureType("unstructured");
createIndexRequest.setName(name);
createIndexRequest.setDescription(desc);
createIndexRequest.setSinkType("DEFAULT");
createIndexRequest.setEmbeddingModelName("text-embedding-v4");
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
}
/**
* 获取指定业务空间下一个或多个知识库的详细信息
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应
*/
public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
}
/**
* 永久性删除指定的知识库
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应
*/
public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception {
Map<String, String> headers = new HashMap<>();
com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
deleteIndexRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
}
/**
* 查询知识库下的文档列表
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应
*/
public static ListIndexDocumentsResponse listIndexDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, Integer pageSize, Integer pageNumber) throws Exception {
com.aliyun.bailian20231229.models.ListIndexDocumentsRequest listIndexDocumentsRequest = new com.aliyun.bailian20231229.models.ListIndexDocumentsRequest();
listIndexDocumentsRequest.setIndexId(indexId);
listIndexDocumentsRequest.setPageSize(pageSize);
listIndexDocumentsRequest.setPageNumber(pageNumber);
return client.listIndexDocuments(workspaceId, listIndexDocumentsRequest);
}
/**
* 删除知识库下的文档
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param ids 删除文件ID列表
* @return 阿里云百炼服务的响应
*/
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, List<String> ids) throws Exception {
com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest deleteIndexDocumentRequest = new com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest();
deleteIndexDocumentRequest.setIndexId(indexId);
deleteIndexDocumentRequest.setDocumentIds(ids);
return client.deleteIndexDocument(workspaceId, deleteIndexDocumentRequest);
}
/**
* 删除阿里云应用数据文档
*
* @param client 客户端Client
* @param workspaceId 业务空间ID
* @param fileId 删除文件ID
* @return 阿里云百炼服务的响应
*/
public static DeleteFileResponse deleteAppDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception {
return client.deleteFile(fileId, workspaceId);
}
}
//package com.gxwebsoft.ai.util;
//
//import java.util.ArrayList;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//
//import com.alibaba.fastjson.JSON;
//import com.aliyun.bailian20231229.models.CreateIndexResponse;
//import com.aliyun.bailian20231229.models.DeleteFileResponse;
//import com.aliyun.bailian20231229.models.DeleteIndexDocumentResponse;
//import com.aliyun.bailian20231229.models.DeleteIndexResponse;
//import com.aliyun.bailian20231229.models.ListIndexDocumentsResponse;
//import com.aliyun.bailian20231229.models.ListIndicesResponse;
//import com.aliyun.bailian20231229.models.RetrieveRequest;
//import com.aliyun.bailian20231229.models.RetrieveResponse;
//import com.aliyun.teautil.models.RuntimeOptions;
//
///**
// * 知识库工具类
// * @author GIIT-YC
// *
// */
//public class KnowledgeBaseUtil {
//
// /**
// * 在指定的知识库中检索信息。
// *
// * @param client 客户端对象bailian20231229Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param query 检索查询语句
// * @return 阿里云百炼服务的响应
// */
// public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query) throws Exception {
// RetrieveRequest retrieveRequest = new RetrieveRequest();
// retrieveRequest.setIndexId(indexId);
// retrieveRequest.setQuery(query);
// retrieveRequest.setDenseSimilarityTopK(100);
// retrieveRequest.setSparseSimilarityTopK(100);
// retrieveRequest.setEnableReranking(false);//开启耗费巨量token
// RuntimeOptions runtime = new RuntimeOptions();
// return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
// }
//
// public static RetrieveResponse retrieveIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String query, List<String> filesIds) throws Exception {
// RetrieveRequest retrieveRequest = new RetrieveRequest();
// retrieveRequest.setIndexId(indexId);
// retrieveRequest.setQuery(query);
// retrieveRequest.setDenseSimilarityTopK(100);
// retrieveRequest.setSparseSimilarityTopK(100);
// retrieveRequest.setEnableReranking(false);//开启耗费巨量token
// List<Map<String, String>> searchFilters = new ArrayList<>();
// Map<String, String> searchFiltersTags = new HashMap<>();
// searchFiltersTags.put("tags", JSON.toJSONString(filesIds));
// searchFilters.add(searchFiltersTags);
// retrieveRequest.setSearchFilters(searchFilters);
// RuntimeOptions runtime = new RuntimeOptions();
// return client.retrieveWithOptions(workspaceId, retrieveRequest, null, runtime);
// }
//
// /**
// * 在阿里云百炼服务中创建知识库(初始化)。
// *
// * @param client 客户端对象
// * @param workspaceId 业务空间ID
// * @param name 知识库名称
// * @param desc 知识库描述
// * @return 阿里云百炼服务的响应对象
// */
// public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String name, String desc) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
// createIndexRequest.setStructureType("unstructured");
// createIndexRequest.setName(name);
// createIndexRequest.setDescription(desc);
// createIndexRequest.setSinkType("DEFAULT");
// createIndexRequest.setEmbeddingModelName("text-embedding-v4");
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
// }
//
// /**
// * 获取指定业务空间下一个或多个知识库的详细信息
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @return 阿里云百炼服务的响应
// */
// public static ListIndicesResponse listIndices(com.aliyun.bailian20231229.Client client, String workspaceId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.ListIndicesRequest listIndicesRequest = new com.aliyun.bailian20231229.models.ListIndicesRequest();
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.listIndicesWithOptions(workspaceId, listIndicesRequest, headers, runtime);
// }
//
// /**
// * 永久性删除指定的知识库
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应
// */
// public static DeleteIndexResponse deleteIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception {
// Map<String, String> headers = new HashMap<>();
// com.aliyun.bailian20231229.models.DeleteIndexRequest deleteIndexRequest = new com.aliyun.bailian20231229.models.DeleteIndexRequest();
// deleteIndexRequest.setIndexId(indexId);
// com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
// return client.deleteIndexWithOptions(workspaceId, deleteIndexRequest, headers, runtime);
// }
//
// /**
// * 查询知识库下的文档列表
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @return 阿里云百炼服务的响应
// */
// public static ListIndexDocumentsResponse listIndexDocuments(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, Integer pageSize, Integer pageNumber) throws Exception {
// com.aliyun.bailian20231229.models.ListIndexDocumentsRequest listIndexDocumentsRequest = new com.aliyun.bailian20231229.models.ListIndexDocumentsRequest();
// listIndexDocumentsRequest.setIndexId(indexId);
// listIndexDocumentsRequest.setPageSize(pageSize);
// listIndexDocumentsRequest.setPageNumber(pageNumber);
// return client.listIndexDocuments(workspaceId, listIndexDocumentsRequest);
// }
//
// /**
// * 删除知识库下的文档
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param indexId 知识库ID
// * @param ids 删除文件ID列表
// * @return 阿里云百炼服务的响应
// */
// public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, List<String> ids) throws Exception {
// com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest deleteIndexDocumentRequest = new com.aliyun.bailian20231229.models.DeleteIndexDocumentRequest();
// deleteIndexDocumentRequest.setIndexId(indexId);
// deleteIndexDocumentRequest.setDocumentIds(ids);
// return client.deleteIndexDocument(workspaceId, deleteIndexDocumentRequest);
// }
//
// /**
// * 删除阿里云应用数据文档
// *
// * @param client 客户端Client
// * @param workspaceId 业务空间ID
// * @param fileId 删除文件ID
// * @return 阿里云百炼服务的响应
// */
// public static DeleteFileResponse deleteAppDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception {
// return client.deleteFile(fileId, workspaceId);
// }
//}