ExportAiTrainingData.php 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. <?php
  2. namespace App\Console\Commands;
  3. use Illuminate\Console\Command;
  4. use Illuminate\Support\Facades\Log;
  5. use App\Models\Sentence;
  6. use App\Models\PaliSentence;
  7. use Illuminate\Support\Str;
  8. use App\Http\Api\MdRender;
  9. class ExportAiTrainingData extends Command
  10. {
  11. /**
  12. * The name and signature of the console command.
  13. *
  14. * @var string
  15. */
  16. protected $signature = 'export:ai.training.data {--format=gz : zip file format 7z,lzma,gz }';
  17. /**
  18. * The console command description.
  19. *
  20. * @var string
  21. */
  22. protected $description = 'export ai training data';
  23. /**
  24. * Create a new command instance.
  25. *
  26. * @return void
  27. */
  28. public function __construct()
  29. {
  30. parent::__construct();
  31. }
  32. /**
  33. * Execute the console command.
  34. *
  35. * @return int
  36. */
  37. public function handle()
  38. {
  39. Log::debug('task export offline sentence-table start');
  40. $filename = 'wikipali-offline-ai-training-'.date("Y-m-d").'.tsv';
  41. $exportFile = storage_path('app/public/export/offline/'.$filename);
  42. $fp = fopen($exportFile, 'w');
  43. if ($fp === false) {
  44. die('无法创建文件');
  45. }
  46. $channels = [
  47. '19f53a65-81db-4b7d-8144-ac33f1217d34',
  48. ];
  49. $start = time();
  50. foreach ($channels as $key => $channel) {
  51. $db = Sentence::where('channel_uid',$channel);
  52. $bar = $this->output->createProgressBar($db->count());
  53. $srcDb = $db->select(['book_id','paragraph',
  54. 'word_start','word_end',
  55. 'content','content_type'])->cursor();
  56. foreach ($srcDb as $sent) {
  57. $content = MdRender::render($sent->content,
  58. [$channel],
  59. null,
  60. 'read',
  61. 'translation',
  62. $sent->content_type,
  63. 'text',
  64. );
  65. $origin = PaliSentence::where('book',$sent->book_id)
  66. ->where('paragraph',$sent->paragraph)
  67. ->where('word_begin',$sent->word_start)
  68. ->where('word_end',$sent->word_end)
  69. ->value('text');
  70. $currData = array(
  71. $origin,
  72. str_replace("\n", "", $content),
  73. );
  74. fwrite($fp, implode("\t", $currData)."\n");
  75. $bar->advance();
  76. }
  77. }
  78. fclose($fp);
  79. $this->info((time() - $start).' seconds');
  80. $this->call('export:zip',[
  81. 'filename'=>$filename,
  82. 'title' => 'wikipali ai training data',
  83. 'format'=> $this->option('format'),
  84. ]);
  85. return 0;
  86. }
  87. }