ExportAiTrainingData.php 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Log;
  5. use App\Models\Sentence;
  6. use App\Models\PaliSentence;
  7. use Illuminate\Support\Str;
  8. use App\Http\Api\MdRender;
  9. class ExportAiTrainingData extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'export:ai.training.data {--format=gz : zip file format 7z,lzma,gz }';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'export ai training data';
  23. /**
  24. * Create a new command instance.
  25. *
  26. * @return void
  27. */
  28. public function __construct()
  29. {
  30. parent::__construct();
  31. }
  32. /**
  33. * Execute the console command.
  34. *
  35. * @return int
  36. */
  37. public function handle()
  38. {
  39. Log::debug('task export offline sentence-table start');
  40. $filename = 'wikipali-offline-ai-training-' . date("Y-m-d") . '.tsv';
  41. $exportFile = storage_path('app/tmp/export/offline/' . $filename);
  42. $fp = fopen($exportFile, 'w');
  43. if ($fp === false) {
  44. die('无法创建文件');
  45. }
  46. $channels = [
  47. '19f53a65-81db-4b7d-8144-ac33f1217d34',
  48. ];
  49. $start = time();
  50. foreach ($channels as $key => $channel) {
  51. $db = Sentence::where('channel_uid', $channel);
  52. $bar = $this->output->createProgressBar($db->count());
  53. $srcDb = $db->select([
  54. 'book_id',
  55. 'paragraph',
  56. 'word_start',
  57. 'word_end',
  58. 'content',
  59. 'content_type'
  60. ])->cursor();
  61. foreach ($srcDb as $sent) {
  62. $content = MdRender::render(
  63. $sent->content,
  64. [$channel],
  65. null,
  66. 'read',
  67. 'translation',
  68. $sent->content_type,
  69. 'text',
  70. );
  71. $origin = PaliSentence::where('book', $sent->book_id)
  72. ->where('paragraph', $sent->paragraph)
  73. ->where('word_begin', $sent->word_start)
  74. ->where('word_end', $sent->word_end)
  75. ->value('text');
  76. $currData = array(
  77. str_replace("\n", "", $origin),
  78. str_replace("\n", "", $content),
  79. );
  80. fwrite($fp, implode("\t", $currData) . "\n");
  81. $bar->advance();
  82. }
  83. }
  84. fclose($fp);
  85. $this->info((time() - $start) . ' seconds');
  86. $this->call('export:zip', [
  87. 'id' => 'ai-translating-training-data',
  88. 'filename' => $exportFile,
  89. 'title' => 'wikipali ai translating training data',
  90. 'format' => $this->option('format'),
  91. ]);
  92. return 0;
  93. }
  94. }