ExportDiscussion.php 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\DB;
  5. use App\Http\Api\ChannelApi;
  6. use Carbon\Carbon;
  7. class ExportDiscussion extends Command
  8. {
  9. /**
  10. * The name and signature of the console command.
  11. * php artisan export:discussion
  12. */
  13. protected $signature = 'export:discussion {editor : The editor UID to export discussions for}';
  14. /**
  15. * The console command description.
  16. */
  17. protected $description = 'Export discussions made by a specific editor to a Markdown file';
  18. /** @var string 巴利原文 channel_uid */
  19. private string $orgChannelId;
  20. /** @var resource 输出文件句柄(流式写入,避免大字符串堆积在内存) */
  21. private $fileHandle;
  22. /** @var string 输出文件路径 */
  23. private string $outputPath;
  24. /**
  25. * Execute the console command.
  26. */
  27. public function handle(): int
  28. {
  29. $editorUid = $this->argument('editor');
  30. $this->info("Fetching discussions for editor: {$editorUid}");
  31. // 1. 获取巴利原文 channel_uid
  32. $this->orgChannelId = ChannelApi::getSysChannel('_System_Pali_VRI_');
  33. if (!$this->orgChannelId) {
  34. $this->error('Failed to retrieve Pali source channel ID.');
  35. return self::FAILURE;
  36. }
  37. $this->info("Pali channel ID: {$this->orgChannelId}");
  38. // 2. 统计总数(用于进度条)
  39. $total = DB::table('discussions')
  40. ->where('editor_uid', $editorUid)
  41. ->where('status', 'active')
  42. ->count();
  43. if ($total === 0) {
  44. $this->warn("No discussions found for editor: {$editorUid}");
  45. return self::SUCCESS;
  46. }
  47. $this->info("Found {$total} discussion(s). Processing...");
  48. // 3. 打开文件句柄(流式写入,不在内存中拼接整个 Markdown)
  49. $filename = "discussion_export_{$editorUid}_" . now()->format('YmdHis') . '.md';
  50. $this->outputPath = storage_path("app/tmp/{$filename}");
  51. $this->fileHandle = fopen($this->outputPath, 'w');
  52. if (!$this->fileHandle) {
  53. $this->error("Cannot open file for writing: {$this->outputPath}");
  54. return self::FAILURE;
  55. }
  56. // 写文件头
  57. $this->writeLine("# 讨论导出报告\n");
  58. $this->writeLine("- **Editor UID**: {$editorUid}");
  59. $this->writeLine("- **导出时间**: " . now()->toDateTimeString());
  60. $this->writeLine("\n---\n");
  61. // 4. 分批处理(每批 50 条),避免内存溢出
  62. $progressBar = $this->output->createProgressBar($total);
  63. $progressBar->start();
  64. DB::table('discussions')
  65. ->where('editor_uid', $editorUid)
  66. ->where('status', 'active')
  67. ->orderBy('created_at', 'asc')
  68. ->select(['id', 'res_id', 'res_type', 'content', 'created_at'])
  69. ->chunk(50, function ($discussions) use ($progressBar) {
  70. $this->processChunk($discussions);
  71. $progressBar->advance($discussions->count());
  72. // 每批处理完后主动释放内存
  73. gc_collect_cycles();
  74. });
  75. $progressBar->finish();
  76. $this->newLine();
  77. fclose($this->fileHandle);
  78. $this->info("\n✅ 导出完成!文件已保存到: {$this->outputPath}");
  79. return self::SUCCESS;
  80. }
  81. /**
  82. * 处理一批 discussions。
  83. */
  84. private function processChunk(\Illuminate\Support\Collection $discussions): void
  85. {
  86. // --- 批量查译文 sentences ---
  87. $resIds = $discussions->pluck('res_id')->unique()->values()->all();
  88. $translationMap = DB::table('sentences')
  89. ->whereIn('uid', $resIds)
  90. ->select([
  91. 'uid',
  92. 'book_id',
  93. 'paragraph',
  94. 'word_start',
  95. 'word_end',
  96. 'content',
  97. 'channel_uid',
  98. ])
  99. ->get()
  100. ->keyBy('uid');
  101. // --- 批量查 sent_histories(分小批,避免超大 IN) ---
  102. $historiesMap = [];
  103. foreach (array_chunk($resIds, 100) as $batch) {
  104. DB::table('sent_histories')
  105. ->whereIn('sent_uid', $batch)
  106. ->orderBy('create_time', 'asc')
  107. ->select(['sent_uid', 'content', 'create_time'])
  108. ->each(function ($row) use (&$historiesMap) {
  109. $historiesMap[$row->sent_uid][] = $row;
  110. });
  111. }
  112. // --- 收集本批所有唯一坐标,批量查巴利原文 ---
  113. $coordKeys = [];
  114. foreach ($translationMap as $t) {
  115. $key = "{$t->book_id}_{$t->paragraph}_{$t->word_start}_{$t->word_end}";
  116. $coordKeys[$key] = $t;
  117. }
  118. $paliMap = $this->fetchPaliSentences($coordKeys);
  119. // --- 写 Markdown ---
  120. foreach ($discussions as $discussion) {
  121. $sentUid = $discussion->res_id;
  122. $translation = $translationMap->get($sentUid);
  123. if (!$translation) {
  124. continue;
  125. }
  126. $coordKey = "{$translation->book_id}_{$translation->paragraph}_{$translation->word_start}_{$translation->word_end}";
  127. $pali = $paliMap[$coordKey] ?? null;
  128. $paliContent = $pali ? trim($pali->content ?? '(无原文)') : '(未找到巴利原文)';
  129. $discussionCreatedAt = $discussion->created_at
  130. ? Carbon::parse($discussion->created_at)
  131. : null;
  132. $histories = $historiesMap[$sentUid] ?? [];
  133. $matchedHistory = $this->findClosestHistory($histories, $discussionCreatedAt);
  134. $translationAtTime = $matchedHistory
  135. ? trim($matchedHistory->content)
  136. : trim($translation->content ?? '(无译文内容)');
  137. $this->writeLine("# {$paliContent}\n");
  138. $this->writeLine(" - **历史译文**: {$translationAtTime}");
  139. $this->writeLine(" - **评论**: " . trim($discussion->title ?? '') . trim($discussion->content ?? ''));
  140. $this->writeLine(" - **当前译文**: {$translation->content}");
  141. $this->writeLine('');
  142. }
  143. // 显式释放本批数据
  144. unset($translationMap, $historiesMap, $coordKeys, $paliMap);
  145. }
  146. /**
  147. * 批量查询巴利原文,每组最多 30 个坐标,避免超大 SQL。
  148. *
  149. * @param array<string, object> $coordKeys key="{book_id}_{paragraph}_{word_start}_{word_end}"
  150. * @return array<string, object>
  151. */
  152. private function fetchPaliSentences(array $coordKeys): array
  153. {
  154. $paliMap = [];
  155. foreach (array_chunk(array_values($coordKeys), 30) as $group) {
  156. $results = DB::table('sentences')
  157. ->where('channel_uid', $this->orgChannelId)
  158. ->where(function ($q) use ($group) {
  159. foreach ($group as $t) {
  160. $q->orWhere(function ($sub) use ($t) {
  161. $sub->where('book_id', $t->book_id)
  162. ->where('paragraph', $t->paragraph)
  163. ->where('word_start', $t->word_start)
  164. ->where('word_end', $t->word_end);
  165. });
  166. }
  167. })
  168. ->select(['book_id', 'paragraph', 'word_start', 'word_end', 'content'])
  169. ->get();
  170. foreach ($results as $ps) {
  171. $key = "{$ps->book_id}_{$ps->paragraph}_{$ps->word_start}_{$ps->word_end}";
  172. $paliMap[$key] = $ps;
  173. }
  174. unset($results);
  175. }
  176. return $paliMap;
  177. }
  178. /**
  179. * 流式写入一行到文件。
  180. */
  181. private function writeLine(string $line): void
  182. {
  183. fwrite($this->fileHandle, $line . "\n");
  184. }
  185. /**
  186. * 在历史记录中找评论发布时间之前最近的那条。
  187. * 若全部在评论之后,则退而取最早一条。
  188. *
  189. * @param array $histories sent_histories(已按 create_time ASC 排序)
  190. * @param Carbon|null $discussionCreatedAt 评论发布时间
  191. */
  192. private function findClosestHistory(array $histories, ?Carbon $discussionCreatedAt): ?object
  193. {
  194. if (empty($histories)) {
  195. return null;
  196. }
  197. if (!$discussionCreatedAt) {
  198. return end($histories) ?: null;
  199. }
  200. $discussionTimestamp = $discussionCreatedAt->timestamp;
  201. $best = null;
  202. $bestDiff = PHP_INT_MAX;
  203. foreach ($histories as $h) {
  204. $historyTime = (int) $h->create_time;
  205. if ($historyTime <= $discussionTimestamp) {
  206. $diff = $discussionTimestamp - $historyTime;
  207. if ($diff < $bestDiff) {
  208. $bestDiff = $diff;
  209. $best = $h;
  210. }
  211. }
  212. }
  213. // 所有历史都在评论之后 → 取最早一条
  214. return $best ?? $histories[0];
  215. }
  216. }