improve evaluation

This commit is contained in:
Julian Freeman
2026-02-25 00:00:07 -04:00
parent 6ff6fd3f25
commit f21366e55f
2 changed files with 100 additions and 12 deletions

View File

@@ -90,20 +90,23 @@ const sourceDropdownOpen = ref(false);
const targetDropdownOpen = ref(false);
const speakerDropdownOpen = ref(false);
const toneDropdownOpen = ref(false);
const evaluationProfileDropdownOpen = ref(false);
const closeAllDropdowns = () => {
sourceDropdownOpen.value = false;
targetDropdownOpen.value = false;
speakerDropdownOpen.value = false;
toneDropdownOpen.value = false;
evaluationProfileDropdownOpen.value = false;
};
const toggleDropdown = (type: 'source' | 'target' | 'speaker' | 'tone') => {
const toggleDropdown = (type: 'source' | 'target' | 'speaker' | 'tone' | 'evaluationProfile') => {
const states = {
source: sourceDropdownOpen,
target: targetDropdownOpen,
speaker: speakerDropdownOpen,
tone: toneDropdownOpen
tone: toneDropdownOpen,
evaluationProfile: evaluationProfileDropdownOpen
};
const targetState = states[type];
@@ -186,6 +189,12 @@ const currentToneLabel = computed(() => {
return TONE_REGISTER_OPTIONS.find(opt => opt.value === settings.toneRegister)?.label || '正式专业';
});
const currentEvaluationProfileLabel = computed(() => {
if (!settings.evaluationProfileId) return '使用主翻译配置(默认)';
const profile = settings.profiles.find(p => p.id === settings.evaluationProfileId);
return profile ? `${profile.name}${profile.modelName}` : '使用主翻译配置(默认)';
});
const swapLanguages = () => {
const temp = { ...settings.sourceLang };
settings.sourceLang = { ...settings.targetLang };
@@ -216,6 +225,20 @@ const evaluateTranslation = async () => {
isEvaluating.value = true;
evaluationResult.value = null;
// Determine which API config to use for evaluation
let apiBaseUrl = settings.apiBaseUrl;
let apiKey = settings.apiKey;
let modelName = settings.modelName;
if (settings.evaluationProfileId) {
const profile = settings.profiles.find(p => p.id === settings.evaluationProfileId);
if (profile) {
apiBaseUrl = profile.apiBaseUrl;
apiKey = profile.apiKey;
modelName = profile.modelName;
}
}
const evaluationPrompt = settings.evaluationPromptTemplate
.replace(/{SOURCE_LANG}/g, sourceLang.value.englishName)
.replace(/{TARGET_LANG}/g, targetLang.value.englishName)
@@ -226,7 +249,7 @@ const evaluateTranslation = async () => {
.replace(/{TRANSLATED_TEXT}/g, targetText.value);
const requestBody = {
model: settings.modelName,
model: modelName,
messages: [
{ role: "system", content: "You are a professional translation auditor. You must respond in valid JSON format." },
{ role: "user", content: evaluationPrompt }
@@ -238,8 +261,8 @@ const evaluateTranslation = async () => {
try {
const response = await invoke<string>('translate', {
apiAddress: settings.apiBaseUrl,
apiKey: settings.apiKey,
apiAddress: apiBaseUrl,
apiKey: apiKey,
payload: requestBody
});
@@ -616,10 +639,10 @@ const translate = async () => {
</p>
</div>
<div v-if="evaluationResult.improvements" class="flex gap-2 p-3 bg-blue-50/50 dark:bg-blue-900/10 rounded-lg border border-blue-100/50 dark:border-blue-900/20">
<div v-if="evaluationResult.improvements && evaluationResult.score < 90" class="flex gap-2 p-3 bg-blue-50/50 dark:bg-blue-900/10 rounded-lg border border-blue-100/50 dark:border-blue-900/20">
<Check class="w-4 h-4 text-blue-500 shrink-0 mt-0.5" />
<div class="space-y-1">
<span class="text-[10px] font-bold text-blue-600/70 dark:text-blue-400/70 uppercase">建议优化</span>
<span class="text-[11px] font-bold text-blue-600/70 dark:text-blue-400/70 uppercase">建议优化</span>
<p class="text-xs text-slate-600 dark:text-slate-300 leading-relaxed">
{{ evaluationResult.improvements }}
</p>
@@ -773,6 +796,69 @@ const translate = async () => {
)"></div>
</button>
</div>
<div v-if="settings.enableEvaluation" class="space-y-3 pt-4 border-t border-dashed dark:border-slate-800 animate-in fade-in slide-in-from-top-2 duration-300">
<div class="flex items-center gap-2 mb-1">
<Settings class="w-3.5 h-3.5 text-blue-500" />
<label class="text-sm font-semibold text-slate-700 dark:text-slate-300">审计模型</label>
</div>
<!-- Custom Evaluation Profile Dropdown -->
<div class="relative lang-dropdown">
<button
@click.stop="toggleDropdown('evaluationProfile')"
class="flex items-center justify-between w-full px-4 py-2.5 border dark:border-slate-700 rounded-xl bg-slate-50/50 dark:bg-slate-800/30 hover:bg-slate-100/50 dark:hover:bg-slate-800/50 transition-all text-sm text-slate-700 dark:text-slate-200 group"
>
<span class="truncate">{{ currentEvaluationProfileLabel }}</span>
<ChevronDown :class="cn('w-4 h-4 text-slate-400 transition-transform duration-200 group-hover:text-blue-500', evaluationProfileDropdownOpen && 'rotate-180')" />
</button>
<transition
enter-active-class="transition duration-100 ease-out"
enter-from-class="transform scale-95 opacity-0"
enter-to-class="transform scale-100 opacity-100"
leave-active-class="transition duration-75 ease-in"
leave-from-class="transform scale-100 opacity-100"
leave-to-class="transform scale-95 opacity-0"
>
<div
v-if="evaluationProfileDropdownOpen"
class="absolute left-0 mt-2 w-full max-h-60 overflow-y-auto bg-white dark:bg-slate-800 rounded-xl shadow-xl border border-slate-200 dark:border-slate-700 z-50 py-2 flex flex-col custom-scrollbar"
>
<button
@click="settings.evaluationProfileId = null; evaluationProfileDropdownOpen = false"
:class="cn(
'px-4 py-2.5 text-sm text-left transition-colors flex items-center justify-between',
settings.evaluationProfileId === null ? 'bg-blue-50 text-blue-600 dark:bg-blue-900/30 dark:text-blue-400 font-bold' : 'text-slate-500 hover:bg-slate-50 dark:hover:bg-slate-700/50'
)"
>
使用主翻译配置默认
<Check v-if="settings.evaluationProfileId === null" class="w-3.5 h-3.5" />
</button>
<div class="h-px bg-slate-100 dark:bg-slate-700 my-1 mx-2"></div>
<button
v-for="profile in settings.profiles"
:key="profile.id"
@click="settings.evaluationProfileId = profile.id; evaluationProfileDropdownOpen = false"
:class="cn(
'px-4 py-2.5 text-sm text-left transition-colors flex items-center justify-between',
settings.evaluationProfileId === profile.id ? 'bg-blue-50 text-blue-600 dark:bg-blue-900/30 dark:text-blue-400 font-bold' : 'text-slate-600 dark:text-slate-300 hover:bg-slate-50 dark:hover:bg-slate-700/50'
)"
>
<div class="flex flex-col min-w-0">
<span class="truncate">{{ profile.name }}</span>
<span class="text-[10px] opacity-60 font-mono">{{ profile.modelName }}</span>
</div>
<Check v-if="settings.evaluationProfileId === profile.id" class="w-3.5 h-3.5 shrink-0" />
</button>
</div>
</transition>
</div>
<p class="text-[11px] text-slate-500 dark:text-slate-500 pl-1">
提示建议为审计选择更强大的模型以获得更精准的反馈
</p>
</div>
</div>
</section>
@@ -789,6 +875,9 @@ const translate = async () => {
rows="6"
class="w-full px-4 py-3 border dark:border-slate-700 rounded-lg bg-transparent focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 outline-none transition-all font-mono text-xs leading-relaxed text-slate-900 dark:text-slate-100"
></textarea>
<div class="flex flex-wrap gap-2 mt-2">
<span v-for="tag in ['{SOURCE_CODE}', '{TARGET_CODE}', '{SOURCE_LANG}', '{TARGET_LANG}', '{SPEAKER_IDENTITY}', '{TONE_REGISTER}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
</div>
</div>
<div v-if="settings.enableEvaluation" class="space-y-2 border-t dark:border-slate-800 pt-6">
@@ -802,13 +891,10 @@ const translate = async () => {
class="w-full px-4 py-3 border dark:border-slate-700 rounded-lg bg-transparent focus:ring-2 focus:ring-blue-500/20 focus:border-blue-500 outline-none transition-all font-mono text-xs leading-relaxed text-slate-900 dark:text-slate-100"
></textarea>
<div class="flex flex-wrap gap-2 mt-2">
<span v-for="tag in ['{SOURCE_TEXT}', '{TRANSLATED_TEXT}', '{CONTEXT}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
<span v-for="tag in ['{SOURCE_LANG}', '{TARGET_LANG}', '{SPEAKER_IDENTITY}', '{TONE_REGISTER}', '{CONTEXT}', '{SOURCE_TEXT}', '{TRANSLATED_TEXT}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
</div>
</div>
<div class="flex flex-wrap gap-2 mt-2">
<span v-for="tag in ['{SOURCE_LANG}', '{TARGET_LANG}', '{SPEAKER_IDENTITY}', '{TONE_REGISTER}']" :key="tag" class="px-2 py-1 bg-slate-100 dark:bg-slate-800 text-[10px] font-mono rounded border dark:border-slate-700 text-slate-600 dark:text-slate-400">{{ tag }}</span>
</div>
</div>
</section>
</div>

View File

@@ -67,7 +67,7 @@ Your task is to critically evaluate the accuracy and quality of a translation.
[Instructions]
1. Compare the [Source Text] and [Translated Text] meticulously.
2. Check if the translation respects the [Context Info] and [Speaker Identity].
2. Check if the translation respects the [Context Info].
3. Assign an "Accuracy Score" from 0 to 100.
- Give 0 if there are fatal semantic errors, complete hallucinations, or if the meaning is reversed.
- Deduct points for minor inaccuracies, unnatural phrasing, or tone mismatches.
@@ -101,6 +101,7 @@ export const useSettingsStore = defineStore('settings', () => {
const enableEvaluation = useLocalStorage('enable-evaluation', true);
const evaluationPromptTemplate = useLocalStorage('evaluation-prompt-template', DEFAULT_EVALUATION_TEMPLATE);
const evaluationProfileId = useLocalStorage<string | null>('evaluation-profile-id', null);
// 存储整个对象以保持一致性
const sourceLang = useLocalStorage<Language>('source-lang-v2', LANGUAGES[0]);
@@ -133,6 +134,7 @@ export const useSettingsStore = defineStore('settings', () => {
systemPromptTemplate,
enableEvaluation,
evaluationPromptTemplate,
evaluationProfileId,
sourceLang,
targetLang,
speakerIdentity,