ExportAiTrainingData.php 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Log;
  5. use App\Models\Sentence;
  6. use App\Models\PaliSentence;
  7. use Illuminate\Support\Str;
  8. use App\Http\Api\MdRender;
  9. class ExportAiTrainingData extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'export:ai.training.data {--format=gz : zip file format 7z,lzma,gz }';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'export ai training data';
  23. /**
  24. * Create a new command instance.
  25. *
  26. * @return void
  27. */
  28. public function __construct()
  29. {
  30. parent::__construct();
  31. }
  32. /**
  33. * Execute the console command.
  34. *
  35. * @return int
  36. */
  37. public function handle()
  38. {
  39. Log::debug('task export offline sentence-table start');
  40. //创建文件夹
  41. $exportDir = storage_path('app/tmp/export/offline');
  42. if (!is_dir($exportDir)) {
  43. $res = mkdir($exportDir, 0755, true);
  44. if (!$res) {
  45. $this->error('mkdir fail path=' . $exportDir);
  46. return 1;
  47. } else {
  48. $this->info('make dir successful ' . $exportDir);
  49. }
  50. }
  51. $filename = 'wikipali-offline-ai-training-' . date("Y-m-d") . '.tsv';
  52. $exportFile = storage_path('app/tmp/export/offline/' . $filename);
  53. $fp = fopen($exportFile, 'w');
  54. if ($fp === false) {
  55. die('无法创建文件');
  56. }
  57. $channels = [
  58. '19f53a65-81db-4b7d-8144-ac33f1217d34',
  59. ];
  60. $start = time();
  61. foreach ($channels as $key => $channel) {
  62. $db = Sentence::where('channel_uid', $channel);
  63. $bar = $this->output->createProgressBar($db->count());
  64. $srcDb = $db->select([
  65. 'book_id',
  66. 'paragraph',
  67. 'word_start',
  68. 'word_end',
  69. 'content',
  70. 'content_type'
  71. ])->cursor();
  72. foreach ($srcDb as $sent) {
  73. $content = MdRender::render(
  74. $sent->content,
  75. [$channel],
  76. null,
  77. 'read',
  78. 'translation',
  79. $sent->content_type,
  80. 'text',
  81. );
  82. $origin = PaliSentence::where('book', $sent->book_id)
  83. ->where('paragraph', $sent->paragraph)
  84. ->where('word_begin', $sent->word_start)
  85. ->where('word_end', $sent->word_end)
  86. ->value('text');
  87. $currData = array(
  88. str_replace("\n", "", $origin),
  89. str_replace("\n", "", $content),
  90. );
  91. fwrite($fp, implode("\t", $currData) . "\n");
  92. $bar->advance();
  93. }
  94. }
  95. fclose($fp);
  96. $this->info((time() - $start) . ' seconds');
  97. $this->call('export:zip', [
  98. 'id' => 'ai-translating-training-data',
  99. 'filename' => $exportFile,
  100. 'title' => 'wikipali ai translating training data',
  101. 'format' => $this->option('format'),
  102. ]);
  103. return 0;
  104. }
  105. }