ExportDiscussion.php 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\DB;
  5. use App\Http\Api\ChannelApi;
  6. use Carbon\Carbon;
  7. class ExportDiscussion extends Command
  8. {
  9. /**
  10. * The name and signature of the console command.
  11. */
  12. protected $signature = 'export:discussion {editor : The editor UID to export discussions for}';
  13. /**
  14. * The console command description.
  15. */
  16. protected $description = 'Export discussions made by a specific editor to a Markdown file';
  17. /** @var string 巴利原文 channel_uid */
  18. private string $orgChannelId;
  19. /** @var resource 输出文件句柄(流式写入,避免大字符串堆积在内存) */
  20. private $fileHandle;
  21. /** @var string 输出文件路径 */
  22. private string $outputPath;
  23. /**
  24. * Execute the console command.
  25. */
  26. public function handle(): int
  27. {
  28. $editorUid = $this->argument('editor');
  29. $this->info("Fetching discussions for editor: {$editorUid}");
  30. // 1. 获取巴利原文 channel_uid
  31. $this->orgChannelId = ChannelApi::getSysChannel('_System_Pali_VRI_');
  32. if (!$this->orgChannelId) {
  33. $this->error('Failed to retrieve Pali source channel ID.');
  34. return self::FAILURE;
  35. }
  36. $this->info("Pali channel ID: {$this->orgChannelId}");
  37. // 2. 统计总数(用于进度条)
  38. $total = DB::table('discussions')
  39. ->where('editor_uid', $editorUid)
  40. ->where('status', 'active')
  41. ->count();
  42. if ($total === 0) {
  43. $this->warn("No discussions found for editor: {$editorUid}");
  44. return self::SUCCESS;
  45. }
  46. $this->info("Found {$total} discussion(s). Processing...");
  47. // 3. 打开文件句柄(流式写入,不在内存中拼接整个 Markdown)
  48. $filename = "discussion_export_{$editorUid}_" . now()->format('YmdHis') . '.md';
  49. $this->outputPath = storage_path("app/tmp/{$filename}");
  50. $this->fileHandle = fopen($this->outputPath, 'w');
  51. if (!$this->fileHandle) {
  52. $this->error("Cannot open file for writing: {$this->outputPath}");
  53. return self::FAILURE;
  54. }
  55. // 写文件头
  56. $this->writeLine("# 讨论导出报告\n");
  57. $this->writeLine("- **Editor UID**: {$editorUid}");
  58. $this->writeLine("- **导出时间**: " . now()->toDateTimeString());
  59. $this->writeLine("\n---\n");
  60. // 4. 分批处理(每批 50 条),避免内存溢出
  61. $progressBar = $this->output->createProgressBar($total);
  62. $progressBar->start();
  63. DB::table('discussions')
  64. ->where('editor_uid', $editorUid)
  65. ->where('status', 'active')
  66. ->orderBy('created_at', 'asc')
  67. ->select(['id', 'res_id', 'res_type', 'content', 'created_at'])
  68. ->chunk(50, function ($discussions) use ($progressBar) {
  69. $this->processChunk($discussions);
  70. $progressBar->advance($discussions->count());
  71. // 每批处理完后主动释放内存
  72. gc_collect_cycles();
  73. });
  74. $progressBar->finish();
  75. $this->newLine();
  76. fclose($this->fileHandle);
  77. $this->info("\n✅ 导出完成!文件已保存到: {$this->outputPath}");
  78. return self::SUCCESS;
  79. }
  80. /**
  81. * 处理一批 discussions。
  82. */
  83. private function processChunk(\Illuminate\Support\Collection $discussions): void
  84. {
  85. // --- 批量查译文 sentences ---
  86. $resIds = $discussions->pluck('res_id')->unique()->values()->all();
  87. $translationMap = DB::table('sentences')
  88. ->whereIn('uid', $resIds)
  89. ->select([
  90. 'uid',
  91. 'book_id',
  92. 'paragraph',
  93. 'word_start',
  94. 'word_end',
  95. 'content',
  96. 'channel_uid',
  97. ])
  98. ->get()
  99. ->keyBy('uid');
  100. // --- 批量查 sent_histories(分小批,避免超大 IN) ---
  101. $historiesMap = [];
  102. foreach (array_chunk($resIds, 100) as $batch) {
  103. DB::table('sent_histories')
  104. ->whereIn('sent_uid', $batch)
  105. ->orderBy('create_time', 'asc')
  106. ->select(['sent_uid', 'content', 'create_time'])
  107. ->each(function ($row) use (&$historiesMap) {
  108. $historiesMap[$row->sent_uid][] = $row;
  109. });
  110. }
  111. // --- 收集本批所有唯一坐标,批量查巴利原文 ---
  112. $coordKeys = [];
  113. foreach ($translationMap as $t) {
  114. $key = "{$t->book_id}_{$t->paragraph}_{$t->word_start}_{$t->word_end}";
  115. $coordKeys[$key] = $t;
  116. }
  117. $paliMap = $this->fetchPaliSentences($coordKeys);
  118. // --- 写 Markdown ---
  119. foreach ($discussions as $discussion) {
  120. $sentUid = $discussion->res_id;
  121. $translation = $translationMap->get($sentUid);
  122. if (!$translation) {
  123. continue;
  124. }
  125. $coordKey = "{$translation->book_id}_{$translation->paragraph}_{$translation->word_start}_{$translation->word_end}";
  126. $pali = $paliMap[$coordKey] ?? null;
  127. $paliContent = $pali ? trim($pali->content ?? '(无原文)') : '(未找到巴利原文)';
  128. $discussionCreatedAt = $discussion->created_at
  129. ? Carbon::parse($discussion->created_at)
  130. : null;
  131. $histories = $historiesMap[$sentUid] ?? [];
  132. $matchedHistory = $this->findClosestHistory($histories, $discussionCreatedAt);
  133. $translationAtTime = $matchedHistory
  134. ? trim($matchedHistory->content)
  135. : trim($translation->content ?? '(无译文内容)');
  136. $this->writeLine("# {$paliContent}\n");
  137. $this->writeLine(" - **译文**: {$translationAtTime}");
  138. $this->writeLine(" - **评论**: " . trim($discussion->title ?? '') . trim($discussion->content ?? ''));
  139. $this->writeLine('');
  140. }
  141. // 显式释放本批数据
  142. unset($translationMap, $historiesMap, $coordKeys, $paliMap);
  143. }
  144. /**
  145. * 批量查询巴利原文,每组最多 30 个坐标,避免超大 SQL。
  146. *
  147. * @param array<string, object> $coordKeys key="{book_id}_{paragraph}_{word_start}_{word_end}"
  148. * @return array<string, object>
  149. */
  150. private function fetchPaliSentences(array $coordKeys): array
  151. {
  152. $paliMap = [];
  153. foreach (array_chunk(array_values($coordKeys), 30) as $group) {
  154. $results = DB::table('sentences')
  155. ->where('channel_uid', $this->orgChannelId)
  156. ->where(function ($q) use ($group) {
  157. foreach ($group as $t) {
  158. $q->orWhere(function ($sub) use ($t) {
  159. $sub->where('book_id', $t->book_id)
  160. ->where('paragraph', $t->paragraph)
  161. ->where('word_start', $t->word_start)
  162. ->where('word_end', $t->word_end);
  163. });
  164. }
  165. })
  166. ->select(['book_id', 'paragraph', 'word_start', 'word_end', 'content'])
  167. ->get();
  168. foreach ($results as $ps) {
  169. $key = "{$ps->book_id}_{$ps->paragraph}_{$ps->word_start}_{$ps->word_end}";
  170. $paliMap[$key] = $ps;
  171. }
  172. unset($results);
  173. }
  174. return $paliMap;
  175. }
  176. /**
  177. * 流式写入一行到文件。
  178. */
  179. private function writeLine(string $line): void
  180. {
  181. fwrite($this->fileHandle, $line . "\n");
  182. }
  183. /**
  184. * 在历史记录中找评论发布时间之前最近的那条。
  185. * 若全部在评论之后,则退而取最早一条。
  186. *
  187. * @param array $histories sent_histories(已按 create_time ASC 排序)
  188. * @param Carbon|null $discussionCreatedAt 评论发布时间
  189. */
  190. private function findClosestHistory(array $histories, ?Carbon $discussionCreatedAt): ?object
  191. {
  192. if (empty($histories)) {
  193. return null;
  194. }
  195. if (!$discussionCreatedAt) {
  196. return end($histories) ?: null;
  197. }
  198. $discussionTimestamp = $discussionCreatedAt->timestamp;
  199. $best = null;
  200. $bestDiff = PHP_INT_MAX;
  201. foreach ($histories as $h) {
  202. $historyTime = (int) $h->create_time;
  203. if ($historyTime <= $discussionTimestamp) {
  204. $diff = $discussionTimestamp - $historyTime;
  205. if ($diff < $bestDiff) {
  206. $bestDiff = $diff;
  207. $best = $h;
  208. }
  209. }
  210. }
  211. // 所有历史都在评论之后 → 取最早一条
  212. return $best ?? $histories[0];
  213. }
  214. }