useWbwStreamProcessor.ts 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. import { useState, useCallback } from "react";
  2. import { useIntl } from "react-intl";
  3. import type { IWbw } from "../types/wbw";
  4. import {
  5. paliEndingGrammar,
  6. paliEndingType,
  7. } from "../components/general/PaliEnding";
  8. // 类型定义
  9. export interface WbwElement<T> {
  10. value: T;
  11. status: number;
  12. }
  13. interface StreamController {
  14. addData: (jsonlLine: string) => void;
  15. complete: () => void;
  16. }
  17. interface ProcessWbwStreamOptions {
  18. modelId: string;
  19. data: IWbw[];
  20. endingType: string[];
  21. endingGrammar: string[];
  22. onProgress?: (data: IWbw[], isComplete: boolean) => void;
  23. onComplete?: (finalData: IWbw[]) => void;
  24. onError?: (error: string) => void;
  25. }
  26. /**
  27. * 处理JSONL流式输出的函数
  28. */
  29. export const processWbwStream = async ({
  30. modelId,
  31. data,
  32. endingType,
  33. endingGrammar,
  34. onProgress,
  35. onComplete,
  36. onError,
  37. }: ProcessWbwStreamOptions): Promise<{
  38. success: boolean;
  39. data?: IWbw[];
  40. error?: string;
  41. }> => {
  42. if (typeof import.meta.env.REACT_APP_OPENAI_PROXY === "undefined") {
  43. console.error("no REACT_APP_OPENAI_PROXY");
  44. const error = "API配置错误";
  45. onError?.(error);
  46. return { success: false, error };
  47. }
  48. const sys_prompt = `
  49. 你是一个巴利语专家。用户提供的jsonl 数据 是巴利文句子的全部单词
  50. 请根据每个单词的拼写 real.value 填写如下字段
  51. 巴利单词的词典原型:parent.value
  52. 单词的中文意思:meaning.value
  53. 巴利单词的拆分:factors.value
  54. 语尾请加[]
  55. 拆分后每个组成部分的中文意思factorMeaning.value
  56. 请按照下表填写巴利语单词的类型 type.value
  57. \`\`\`csv
  58. ${endingType.join("\n")}
  59. \`\`\`
  60. 请按照下表填写巴利语单词的语法信息 grammar.value
  61. 名词和形容词填写 性,数,格
  62. 动词填写 人称,数,时态语气
  63. 用 $ 作为分隔符
  64. \`\`\`csv
  65. ${endingGrammar.join("\n")}
  66. \`\`\`
  67. 直接输出JSONL格式数据
  68. `;
  69. const jsonl = data.map((obj) => JSON.stringify(obj)).join("\n");
  70. const prompt = `
  71. \`\`\`jsonl
  72. ${jsonl}
  73. \`\`\`
  74. `;
  75. console.debug("ai wbw system prompt", sys_prompt, prompt);
  76. try {
  77. const payload = {
  78. model: "grok-3", // 或者从models数组中获取实际模型名称
  79. messages: [
  80. {
  81. role: "system",
  82. content: sys_prompt,
  83. },
  84. { role: "user", content: prompt },
  85. ],
  86. stream: true,
  87. temperature: 0.3,
  88. max_tokens: 4000,
  89. };
  90. const url = import.meta.env.REACT_APP_OPENAI_PROXY;
  91. const requestData = {
  92. model_id: modelId,
  93. payload: payload,
  94. };
  95. console.info("api request", url, requestData);
  96. const response = await fetch(url, {
  97. method: "POST",
  98. headers: {
  99. "Content-Type": "application/json",
  100. Authorization: `Bearer AIzaSyCzr8KqEdaQ3cRCxsFwSHh8c7kF3RZTZWw`,
  101. },
  102. body: JSON.stringify(requestData),
  103. });
  104. if (!response.ok) {
  105. throw new Error(`HTTP error! status: ${response.status}`);
  106. }
  107. const reader = response.body?.getReader();
  108. if (!reader) {
  109. throw new Error("无法获取响应流");
  110. }
  111. const decoder = new TextDecoder();
  112. let buffer = "";
  113. let jsonlBuffer = ""; // 用于累积JSONL内容
  114. const resultData: IWbw[] = [];
  115. // 创建流控制器
  116. const streamController: StreamController = {
  117. addData: (jsonlLine: string) => {
  118. try {
  119. // 解析JSONL行
  120. const parsedData = JSON.parse(jsonlLine.trim());
  121. console.info("ai wbw stream ok", parsedData);
  122. // 转换为IWbw格式
  123. const wbwData: IWbw = {
  124. book: parsedData.book || 0,
  125. para: parsedData.para || 0,
  126. sn: parsedData.sn || [],
  127. word: parsedData.word || { value: "", status: 0 },
  128. real: parsedData.real || { value: null, status: 0 },
  129. meaning: parsedData.meaning,
  130. type: parsedData.type,
  131. grammar: parsedData.grammar,
  132. style: parsedData.style,
  133. case: parsedData.case,
  134. parent: parsedData.parent,
  135. parent2: parsedData.parent2,
  136. grammar2: parsedData.grammar2,
  137. factors: parsedData.factors,
  138. factorMeaning: parsedData.factorMeaning,
  139. relation: parsedData.relation,
  140. note: parsedData.note,
  141. bookMarkColor: parsedData.bookMarkColor,
  142. bookMarkText: parsedData.bookMarkText,
  143. locked: parsedData.locked || false,
  144. confidence: parsedData.confidence || 0.5,
  145. attachments: parsedData.attachments,
  146. hasComment: parsedData.hasComment,
  147. grammarId: parsedData.grammarId,
  148. bookName: parsedData.bookName,
  149. editor: parsedData.editor,
  150. created_at: parsedData.created_at,
  151. updated_at: parsedData.updated_at,
  152. };
  153. resultData.push(wbwData);
  154. // 调用进度回调
  155. onProgress?.(resultData, false);
  156. } catch (e) {
  157. console.warn("解析JSONL行失败:", e, "内容:", jsonlLine);
  158. }
  159. },
  160. complete: () => {
  161. onProgress?.(resultData, true);
  162. onComplete?.(resultData);
  163. },
  164. };
  165. try {
  166. while (true) {
  167. const { done, value } = await reader.read();
  168. if (done) {
  169. // 处理最后的缓冲内容
  170. if (jsonlBuffer.trim()) {
  171. const lines = jsonlBuffer.trim().split("\n");
  172. for (const line of lines) {
  173. if (line.trim()) {
  174. streamController.addData(line);
  175. }
  176. }
  177. }
  178. streamController.complete();
  179. return { success: true, data: resultData };
  180. }
  181. buffer += decoder.decode(value, { stream: true });
  182. const lines = buffer.split("\n");
  183. buffer = lines.pop() || "";
  184. for (const line of lines) {
  185. if (line.trim() === "") continue;
  186. if (line.startsWith("data: ")) {
  187. const data = line.slice(6);
  188. if (data === "[DONE]") {
  189. // 处理剩余的JSONL内容
  190. if (jsonlBuffer.trim()) {
  191. const jsonlLines = jsonlBuffer.trim().split("\n");
  192. for (const jsonlLine of jsonlLines) {
  193. if (jsonlLine.trim()) {
  194. streamController.addData(jsonlLine);
  195. }
  196. }
  197. }
  198. streamController.complete();
  199. return { success: true, data: resultData };
  200. }
  201. try {
  202. const parsed = JSON.parse(data);
  203. const delta = parsed.choices?.[0]?.delta;
  204. if (delta?.content) {
  205. // 累积内容到JSONL缓冲区
  206. jsonlBuffer += delta.content;
  207. // 检查是否有完整的JSONL行
  208. const jsonlLines = jsonlBuffer.split("\n");
  209. // 保留最后一行(可能不完整)
  210. jsonlBuffer = jsonlLines.pop() || "";
  211. // 处理完整的行
  212. for (const jsonlLine of jsonlLines) {
  213. if (jsonlLine.trim()) {
  214. streamController.addData(jsonlLine);
  215. }
  216. }
  217. }
  218. } catch (e) {
  219. console.warn("解析SSE数据失败:", e);
  220. }
  221. }
  222. }
  223. }
  224. } catch (error) {
  225. console.error("读取流数据失败:", error);
  226. const errorMessage = "读取响应流失败";
  227. onError?.(errorMessage);
  228. return { success: false, error: errorMessage };
  229. }
  230. } catch (error) {
  231. console.error("API调用失败:", error);
  232. const errorMessage = "API调用失败,请重试";
  233. onError?.(errorMessage);
  234. return { success: false, error: errorMessage };
  235. }
  236. };
  237. /**
  238. * React Hook 用法示例
  239. */
  240. export const useWbwStreamProcessor = () => {
  241. const [isProcessing, setIsProcessing] = useState<boolean>(false);
  242. const [wbwData, setWbwData] = useState<IWbw[]>([]);
  243. const [error, setError] = useState<string>();
  244. const intl = useIntl(); // 在Hook中使用
  245. const endingType = paliEndingType.map((item) => {
  246. return (
  247. intl.formatMessage({ id: `dict.fields.type.${item}.label` }) +
  248. `:.${item}.`
  249. );
  250. });
  251. const endingGrammar = paliEndingGrammar.map((item) => {
  252. return (
  253. intl.formatMessage({ id: `dict.fields.type.${item}.label` }) +
  254. `:.${item}.`
  255. );
  256. });
  257. const processStream = useCallback(
  258. async (modelId: string, data: IWbw[]) => {
  259. setIsProcessing(true);
  260. setWbwData([]);
  261. setError(undefined);
  262. const result = await processWbwStream({
  263. modelId,
  264. data,
  265. endingType,
  266. endingGrammar,
  267. onProgress: (data) => {
  268. console.info("onProgress", data);
  269. setWbwData([...data]); // 创建新数组触发重渲染
  270. },
  271. onComplete: (finalData) => {
  272. setWbwData(finalData);
  273. setIsProcessing(false);
  274. },
  275. onError: (errorMessage) => {
  276. setError(errorMessage);
  277. setIsProcessing(false);
  278. },
  279. });
  280. if (!result.success) {
  281. setError(result.error || "处理失败");
  282. setIsProcessing(false);
  283. }
  284. return result;
  285. },
  286. [endingGrammar, endingType]
  287. );
  288. return {
  289. processStream,
  290. isProcessing,
  291. wbwData,
  292. error,
  293. clearData: useCallback(() => {
  294. setWbwData([]);
  295. setError(undefined);
  296. }, []),
  297. };
  298. };