|
@@ -114,7 +114,8 @@ class ExportAiTrainingData extends Command
|
|
|
'word_start',
|
|
'word_start',
|
|
|
'word_end',
|
|
'word_end',
|
|
|
'content',
|
|
'content',
|
|
|
- 'content_type'
|
|
|
|
|
|
|
+ 'content_type',
|
|
|
|
|
+ 'updated_at'
|
|
|
])
|
|
])
|
|
|
->whereNotNull('content')
|
|
->whereNotNull('content')
|
|
|
->orderBy('book_id')
|
|
->orderBy('book_id')
|
|
@@ -145,7 +146,7 @@ class ExportAiTrainingData extends Command
|
|
|
'read',
|
|
'read',
|
|
|
'translation',
|
|
'translation',
|
|
|
$sent->content_type,
|
|
$sent->content_type,
|
|
|
- 'text',
|
|
|
|
|
|
|
+ 'html',
|
|
|
);
|
|
);
|
|
|
$translation = trim($translation);
|
|
$translation = trim($translation);
|
|
|
// 忽略空的译文
|
|
// 忽略空的译文
|
|
@@ -155,10 +156,12 @@ class ExportAiTrainingData extends Command
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//忽略过短的译文
|
|
//忽略过短的译文
|
|
|
|
|
+ /*
|
|
|
if (mb_strlen($translation) / mb_strlen($origin) < $this->ShortTrans) {
|
|
if (mb_strlen($translation) / mb_strlen($origin) < $this->ShortTrans) {
|
|
|
Log::warning('translation is short id=' . $id);
|
|
Log::warning('translation is short id=' . $id);
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
+ */
|
|
|
//原文与翻译完全相同
|
|
//原文与翻译完全相同
|
|
|
if ($translation === $origin) {
|
|
if ($translation === $origin) {
|
|
|
Log::warning('translation is same id=' . $id);
|
|
Log::warning('translation is same id=' . $id);
|
|
@@ -174,6 +177,7 @@ class ExportAiTrainingData extends Command
|
|
|
'translation' => $translation,
|
|
'translation' => $translation,
|
|
|
'category' => $tags,
|
|
'category' => $tags,
|
|
|
'path' => $path,
|
|
'path' => $path,
|
|
|
|
|
+ 'updated_at' => $sent->updated_at
|
|
|
];
|
|
];
|
|
|
|
|
|
|
|
fwrite($fp, json_encode($currData, JSON_UNESCAPED_UNICODE) . "\n");
|
|
fwrite($fp, json_encode($currData, JSON_UNESCAPED_UNICODE) . "\n");
|