support evaluation

This commit is contained in:
Julian Freeman
2026-02-24 23:27:06 -04:00
parent f6bc4152b0
commit 6ff6fd3f25
2 changed files with 182 additions and 4 deletions

View File

@@ -25,6 +25,7 @@ import {
useSettingsStore,
LANGUAGES,
DEFAULT_TEMPLATE,
DEFAULT_EVALUATION_TEMPLATE,
SPEAKER_IDENTITY_OPTIONS,
TONE_REGISTER_OPTIONS,
type ApiProfile
@@ -134,6 +135,15 @@ const targetText = ref('');
const isTranslating = ref(false);
const showCopyFeedback = ref(false);
interface EvaluationResult {
score: number;
analysis: string;
improvements?: string;
}
const evaluationResult = ref<EvaluationResult | null>(null);
const isEvaluating = ref(false);
let unlisten: (() => void) | null = null;
onMounted(async () => {
@@ -185,6 +195,7 @@ const swapLanguages = () => {
const clearSource = () => {
sourceText.value = '';
targetText.value = '';
evaluationResult.value = null;
};
const copyTarget = async () => {
@@ -199,11 +210,61 @@ const copyTarget = async () => {
}
};
const evaluateTranslation = async () => {
if (!settings.enableEvaluation || !targetText.value) return;
isEvaluating.value = true;
evaluationResult.value = null;
const evaluationPrompt = settings.evaluationPromptTemplate
.replace(/{SOURCE_LANG}/g, sourceLang.value.englishName)
.replace(/{TARGET_LANG}/g, targetLang.value.englishName)
.replace(/{SPEAKER_IDENTITY}/g, settings.speakerIdentity)
.replace(/{TONE_REGISTER}/g, settings.toneRegister)
.replace(/{CONTEXT}/g, context.value || 'None')
.replace(/{SOURCE_TEXT}/g, sourceText.value)
.replace(/{TRANSLATED_TEXT}/g, targetText.value);
const requestBody = {
model: settings.modelName,
messages: [
{ role: "system", content: "You are a professional translation auditor. You must respond in valid JSON format." },
{ role: "user", content: evaluationPrompt }
],
stream: false // Non-streaming for evaluation to parse JSON
};
settings.addLog('request', { type: 'evaluation', ...requestBody });
try {
const response = await invoke<string>('translate', {
apiAddress: settings.apiBaseUrl,
apiKey: settings.apiKey,
payload: requestBody
});
try {
// Try to extract JSON if the model wrapped it in code blocks
const jsonStr = response.replace(/```json\s?|\s?```/g, '').trim();
evaluationResult.value = JSON.parse(jsonStr);
settings.addLog('response', { type: 'evaluation', content: evaluationResult.value });
} catch (parseErr) {
console.error('Failed to parse evaluation result:', response);
settings.addLog('error', `Evaluation parsing error: ${response}`);
}
} catch (err: any) {
settings.addLog('error', `Evaluation error: ${String(err)}`);
} finally {
isEvaluating.value = false;
}
};
const translate = async () => {
if (!sourceText.value.trim() || isTranslating.value) return;
isTranslating.value = true;
targetText.value = '';
evaluationResult.value = null;
const systemMessage = settings.systemPromptTemplate
.replace(/{SOURCE_LANG}/g, sourceLang.value.englishName)
@@ -240,6 +301,11 @@ const translate = async () => {
targetText.value = response;
}
settings.addLog('response', 'Translation completed');
// Trigger evaluation if enabled
if (settings.enableEvaluation) {
await evaluateTranslation();
}
} catch (err: any) {
const errorMsg = String(err);
settings.addLog('error', errorMsg);
@@ -520,6 +586,47 @@ const translate = async () => {
{{ targetText }}
</template>
<span v-else class="text-slate-300 dark:text-slate-600 italic">翻译结果将在此显示...</span>
<!-- Evaluation Results -->
<div v-if="isEvaluating || evaluationResult" class="mt-8 pt-6 border-t dark:border-slate-800 space-y-4 animate-in fade-in slide-in-from-bottom-2 duration-500">
<div class="flex items-center justify-between">
<div class="flex items-center gap-2">
<div :class="cn(
'w-2 h-2 rounded-full',
isEvaluating ? 'bg-blue-400 animate-pulse' : (evaluationResult?.score && evaluationResult.score >= 80 ? 'bg-green-500' : evaluationResult?.score && evaluationResult.score >= 60 ? 'bg-amber-500' : 'bg-red-500')
)"></div>
<h3 class="text-xs font-bold text-slate-400 uppercase tracking-widest">翻译质量审计</h3>
</div>
<div v-if="evaluationResult" :class="cn(
'text-lg font-black font-mono',
evaluationResult.score >= 80 ? 'text-green-600' : evaluationResult.score >= 60 ? 'text-amber-600' : 'text-red-600'
)">
{{ evaluationResult.score }} <span class="text-[10px] font-normal opacity-50">/ 100</span>
</div>
<div v-else-if="isEvaluating" class="flex items-center gap-1.5 text-xs text-blue-500 font-medium">
<Loader2 class="w-3 h-3 animate-spin" />
正在审计...
</div>
</div>
<div v-if="evaluationResult" class="space-y-3">
<div class="bg-slate-50 dark:bg-slate-800/40 p-3 rounded-lg border border-slate-100 dark:border-slate-800/60">
<p class="text-xs text-slate-600 dark:text-slate-300 leading-relaxed">
{{ evaluationResult.analysis }}
</p>
</div>
<div v-if="evaluationResult.improvements" class="flex gap-2 p-3 bg-blue-50/50 dark:bg-blue-900/10 rounded-lg border border-blue-100/50 dark:border-blue-900/20">
<Check class="w-4 h-4 text-blue-500 shrink-0 mt-0.5" />
<div class="space-y-1">
<span class="text-[10px] font-bold text-blue-600/70 dark:text-blue-400/70 uppercase">建议优化</span>
<p class="text-xs text-slate-600 dark:text-slate-300 leading-relaxed">
{{ evaluationResult.improvements }}
</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
@@ -647,26 +754,61 @@ const translate = async () => {
)"></div>
</button>
</div>
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-slate-700 dark:text-slate-300">自动质量审计</label>
<p class="text-xs text-slate-500 dark:text-slate-500">翻译完成后自动评估准确度</p>
</div>
<button
@click="settings.enableEvaluation = !settings.enableEvaluation"
:class="cn(
'w-12 h-6 rounded-full transition-colors relative',
settings.enableEvaluation ? 'bg-blue-600' : 'bg-slate-300 dark:bg-slate-700'
)"
>
<div :class="cn(
'absolute top-1 left-1 w-4 h-4 bg-white rounded-full transition-transform',
settings.enableEvaluation ? 'translate-x-6' : 'translate-x-0'
)"></div>
</button>
</div>
</div>
</section>
<section>
<h2 class="text-sm font-semibold text-slate-500 dark:text-slate-400 uppercase tracking-wider mb-4">提示词工程</h2>
<div class="bg-slate-200/20 dark:bg-slate-900 rounded-xl shadow-sm/5 border dark:border-slate-800 p-6">
<div class="bg-slate-200/20 dark:bg-slate-900 rounded-xl shadow-sm/5 border dark:border-slate-800 p-6 space-y-6">
<div class="space-y-2">
<div class="flex items-center justify-between">
<label class="text-sm font-medium text-slate-700 dark:text-slate-300">系统提示词模板</label>
<label class="text-sm font-medium text-slate-700 dark:text-slate-300">系统提示词模板 (翻译)</label>
<button @click="settings.systemPromptTemplate = DEFAULT_TEMPLATE" class="text-xs text-blue-600 dark:text-blue-400 hover:underline">恢复默认值</button>
</div>
<textarea
v-model="settings.systemPromptTemplate"
rows="9"
rows="6"
class="w-full px-4 py-3 border dark:border-slate-700 rounded-lg bg-transparent focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 outline-none transition-all font-mono text-xs leading-relaxed text-slate-900 dark:text-slate-100"
></textarea>
</div>
<div v-if="settings.enableEvaluation" class="space-y-2 border-t dark:border-slate-800 pt-6">
<div class="flex items-center justify-between">
<label class="text-sm font-medium text-slate-700 dark:text-slate-300">审计提示词模板 (评估)</label>
<button @click="settings.evaluationPromptTemplate = DEFAULT_EVALUATION_TEMPLATE" class="text-xs text-blue-600 dark:text-blue-400 hover:underline">恢复默认值</button>
</div>
<textarea
v-model="settings.evaluationPromptTemplate"
rows="8"
class="w-full px-4 py-3 border dark:border-slate-700 rounded-lg bg-transparent focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 outline-none transition-all font-mono text-xs leading-relaxed text-slate-900 dark:text-slate-100"
></textarea>
<div class="flex flex-wrap gap-2 mt-2">
<span v-for="tag in ['{SOURCE_LANG}', '{SOURCE_CODE}', '{TARGET_LANG}', '{TARGET_CODE}', '{SPEAKER_IDENTITY}', '{TONE_REGISTER}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
<span v-for="tag in ['{SOURCE_TEXT}', '{TRANSLATED_TEXT}', '{CONTEXT}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
</div>
</div>
<div class="flex flex-wrap gap-2 mt-2">
<span v-for="tag in ['{SOURCE_LANG}', '{TARGET_LANG}', '{SPEAKER_IDENTITY}', '{TONE_REGISTER}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
</div>
</div>
</section>
</div>

View File

@@ -51,6 +51,37 @@ export const DEFAULT_TEMPLATE = `You are a professional {SOURCE_LANG} ({SOURCE_C
3. Produce ONLY the {TARGET_LANG} translation, without any additional explanations, notes, or commentary.
4. If [Context] is provided, use it strictly to disambiguate polysemous words. DO NOT add any factual information or descriptive details from the [Context] that are not present in the [Text to Translate].`;
export const DEFAULT_EVALUATION_TEMPLATE = `You are an expert translation auditor proficient in {SOURCE_LANG} and {TARGET_LANG}.
Your task is to critically evaluate the accuracy and quality of a translation.
[Context Info]
- Source Language: {SOURCE_LANG}
- Target Language: {TARGET_LANG}
- Speaker Identity: {SPEAKER_IDENTITY}
- Intended Tone/Register: {TONE_REGISTER}
- Context: {CONTEXT}
[Input]
- Source Text: {SOURCE_TEXT}
- Translated Text: {TRANSLATED_TEXT}
[Instructions]
1. Compare the [Source Text] and [Translated Text] meticulously.
2. Check if the translation respects the [Context Info] and [Speaker Identity].
3. Assign an "Accuracy Score" from 0 to 100.
- Give 0 if there are fatal semantic errors, complete hallucinations, or if the meaning is reversed.
- Deduct points for minor inaccuracies, unnatural phrasing, or tone mismatches.
4. Provide a concise "Analysis" of why you gave that score.
5. (Optional) Provide "Improvements" for a more accurate/natural translation.
[Output Format]
You MUST respond in JSON format with the following keys. The values for "analysis" and "improvements" MUST be written in Simplified Chinese (简体中文), except when quoting the source or target text:
{
"score": number,
"analysis": "string",
"improvements": "string"
}`;
export interface ApiProfile {
id: string;
name: string;
@@ -68,6 +99,9 @@ export const useSettingsStore = defineStore('settings', () => {
const enableStreaming = useLocalStorage('enable-streaming', true);
const systemPromptTemplate = useLocalStorage('system-prompt-template', DEFAULT_TEMPLATE);
const enableEvaluation = useLocalStorage('enable-evaluation', true);
const evaluationPromptTemplate = useLocalStorage('evaluation-prompt-template', DEFAULT_EVALUATION_TEMPLATE);
// 存储整个对象以保持一致性
const sourceLang = useLocalStorage<Language>('source-lang-v2', LANGUAGES[0]);
const targetLang = useLocalStorage<Language>('target-lang-v2', LANGUAGES[4]);
@@ -97,6 +131,8 @@ export const useSettingsStore = defineStore('settings', () => {
profiles,
enableStreaming,
systemPromptTemplate,
enableEvaluation,
evaluationPromptTemplate,
sourceLang,
targetLang,
speakerIdentity,