001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (https://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: https://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: https://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.ai; 029 030import org.opencms.ade.configuration.CmsADEConfigData; 031import org.opencms.ade.contenteditor.I_CmsContentTranslator; 032import org.opencms.ai.CmsTranslationUtil.FoundOrCreatedValue; 033import org.opencms.configuration.CmsConfigurationException; 034import org.opencms.configuration.CmsParameterConfiguration; 035import org.opencms.file.CmsFile; 036import org.opencms.file.CmsObject; 037import org.opencms.gwt.shared.CmsGwtConstants; 038import org.opencms.i18n.CmsLocaleManager; 039import org.opencms.main.CmsLog; 040import org.opencms.main.OpenCms; 041import org.opencms.util.CmsStringUtil; 042import org.opencms.xml.content.CmsXmlContent; 043import org.opencms.xml.content.I_CmsXmlContentAugmentation; 044import org.opencms.xml.types.CmsXmlHtmlValue; 045import org.opencms.xml.types.I_CmsXmlContentValue; 046 047import java.nio.charset.StandardCharsets; 048import java.util.ArrayList; 049import java.util.Arrays; 050import java.util.Collection; 051import java.util.HashMap; 052import java.util.List; 053import java.util.Locale; 054import java.util.Map; 055import java.util.stream.Collectors; 056 057import org.apache.commons.lang3.LocaleUtils; 058import org.apache.commons.logging.Log; 059 060import com.deepl.api.DeepLClient; 061import com.deepl.api.DeepLClientOptions; 062import com.deepl.api.Language; 063import com.deepl.api.TextResult; 064import com.deepl.api.TextTranslationOptions; 065import org.opencms.ai.*; 066import com.google.common.collect.ArrayListMultimap; 067import com.google.common.collect.Multimap; 068 069public class CmsDeeplTranslation implements I_CmsContentTranslator { 070 071 /** Key used for looking up the API key in the secret store. */ 072 public static final String SECRET_API_KEY = "contenteditor.translation.deepl.apiKey"; 073 074 /** The logger instance for this class. */ 075 private static final Log LOG = CmsLog.getLog(CmsDeeplTranslation.class); 076 077 /** Configuration parameter for determining a target locale mapping. */ 078 protected static final Object PARAM_TARGET_LOCALE_MAPPING = "targetLocaleMapping"; 079 080 /** Cached available target languages. */ 081 private volatile List<Language> m_targetLanguages; 082 083 /** Cached available source languages. */ 084 private volatile List<Language> m_sourceLanguages; 085 086 /** The parameters from the configuration. */ 087 private CmsParameterConfiguration m_params = new CmsParameterConfiguration(); 088 089 /** The parsed target locale mapping. */ 090 private volatile Map<Locale, Locale> m_targetLocaleMapping; 091 092 /** 093 * Tries to find a suitable Language object from a list of DeepL-supported languages that matches a given locale. 094 * 095 * @param locale the locale 096 * @param languages the languages 097 * @return the matching language 098 */ 099 public static Language getMatchingLanguage(Locale locale, List<Language> languages) { 100 101 for (Language language : languages) { 102 if (Locale.forLanguageTag(language.getCode()).equals(locale)) { 103 return language; 104 } 105 } 106 // if not found, fall back to just using the language portion 107 for (Language language : languages) { 108 Locale l1 = new Locale(Locale.forLanguageTag(language.getCode()).getLanguage()); 109 Locale l2 = new Locale(locale.getLanguage()); 110 if (l1.equals(l2)) { 111 return language; 112 } 113 } 114 return null; 115 } 116 117 /** 118 * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#addConfigurationParameter(java.lang.String, java.lang.String) 119 */ 120 @Override 121 public void addConfigurationParameter(String paramName, String paramValue) { 122 123 m_params.add(paramName, paramValue); 124 125 } 126 127 /** 128 * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration() 129 */ 130 @Override 131 public CmsParameterConfiguration getConfiguration() { 132 133 return m_params; 134 } 135 136 /** 137 * @see org.opencms.ade.contenteditor.I_CmsContentTranslator#getContentAugmentation() 138 */ 139 @Override 140 public I_CmsXmlContentAugmentation getContentAugmentation() { 141 142 return new I_CmsXmlContentAugmentation() { 143 144 @Override 145 public void augmentContent(Context context) throws Exception { 146 147 CmsObject cms = context.getCmsObject(); 148 final Locale wpLocale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms); 149 CmsXmlContent content = context.getContent(); 150 Locale sourceLocale = context.getLocale(); 151 String targetLocaleParam = context.getParameter(CmsGwtConstants.PARAM_TARGET_LOCALE); 152 String apiKey = OpenCms.getSecretStore().getSecret(SECRET_API_KEY); 153 apiKey = apiKey.trim(); 154 Locale targetLocale = CmsLocaleManager.getLocale(targetLocaleParam); 155 156 DeepLClientOptions options = new DeepLClientOptions(); 157 DeepLClient client = new DeepLClient(apiKey, options); 158 159 String html = CmsTranslationUtil.getWaitMessage(wpLocale); 160 context.progress(html); 161 162 List<Language> sourceLanguages = getSourceLanguages(client); 163 List<Language> targetLanguages = getTargetLanguages(client); 164 Language srcLang = getMatchingLanguage(sourceLocale, sourceLanguages); 165 Language targetLang = getMatchingLanguage( 166 getTargetLocaleMapping().getOrDefault(targetLocale, targetLocale), 167 targetLanguages); 168 169 List<I_CmsXmlContentValue> values = CmsTranslationUtil.getValuesToTranslate( 170 cms, 171 content, 172 sourceLocale, 173 targetLocale); 174 175 // DeepL can translate multiple strings in one API call. But the total request size is limited, 176 // and also all strings in that API call are translated with the same translation options. 177 // So we first group the values to be translated by whether they are HTML values or not 178 // (This is because when we use HTML mode for non-HTML values, it may replace <, > with entities.). 179 // Then we split each of these two groups into smaller batches which can be translated in one API call each. 180 181 Multimap<Boolean, I_CmsXmlContentValue> valuesByType = ArrayListMultimap.create(); 182 for (I_CmsXmlContentValue value : values) { 183 valuesByType.put(value instanceof CmsXmlHtmlValue, value); 184 } 185 186 // Collect *all* translation results in this map 187 Map<String, String> valuesToSet = new HashMap<>(); 188 189 for (Boolean isHtml : Arrays.asList(Boolean.FALSE, Boolean.TRUE)) { 190 TextTranslationOptions translationOptions = new TextTranslationOptions(); 191 if (isHtml) { 192 translationOptions.setTagHandlingVersion("v2"); 193 translationOptions.setTagHandling("html"); 194 } 195 Collection<I_CmsXmlContentValue> valuesForCurrentMode = valuesByType.get(isHtml); 196 if (valuesForCurrentMode.isEmpty()) { 197 continue; 198 } 199 List<List<I_CmsXmlContentValue>> batches = new ArrayList<>(); 200 201 // Max request size for the API is 128K, but this also includes JSON punctuation and additional parameters, so we leave a wide safety margin 202 final int limit = 15; 203 int currentBatchSize = 0; 204 List<I_CmsXmlContentValue> currentBatch = new ArrayList<>(); 205 batches.add(currentBatch); 206 for (I_CmsXmlContentValue val : valuesForCurrentMode) { 207 String strValue = getTranslationValue(cms, val); 208 long byteSize = strValue.getBytes(StandardCharsets.UTF_8).length; 209 if ((byteSize + currentBatchSize) > limit) { 210 currentBatch = new ArrayList<>(); 211 batches.add(currentBatch); 212 currentBatchSize = 0; 213 } 214 currentBatch.add(val); 215 currentBatchSize += byteSize; 216 } 217 218 for (List<I_CmsXmlContentValue> batch : batches) { 219 if (batch.size() == 0) { 220 continue; 221 } 222 223 List<String> inputs = batch.stream().map(val -> getTranslationValue(cms, val)).collect( 224 Collectors.toList()); 225 List<TextResult> outputs = client.translateText( 226 inputs, 227 srcLang, 228 targetLang, 229 translationOptions); 230 for (int i = 0; i < batch.size(); i++) { 231 valuesToSet.put(batch.get(i).getPath(), outputs.get(i).getText()); 232 } 233 } 234 } 235 int numTranslatedFields = 0; 236 237 if (!content.hasLocale(targetLocale)) { 238 content.copyLocale(sourceLocale, targetLocale); 239 for (Map.Entry<String, String> entry : valuesToSet.entrySet()) { 240 I_CmsXmlContentValue targetValue = content.getValue(entry.getKey(), targetLocale); 241 setTranslationValue(cms, targetValue, entry.getValue()); 242 numTranslatedFields += 1; 243 } 244 } else { 245 for (Map.Entry<String, String> entry : valuesToSet.entrySet()) { 246 I_CmsXmlContentValue origValue = content.getValue(entry.getKey(), sourceLocale); 247 try { 248 FoundOrCreatedValue val = CmsTranslationUtil.findOrCreateValue( 249 cms, 250 content, 251 targetLocale, 252 origValue.getPath()); 253 // If the value already existed, we only want to write to it if it's empty. 254 // But if it was just created, it might have a default value, which we need to overwrite. 255 if (val.wasCreated() 256 || CmsStringUtil.isEmptyOrWhitespaceOnly(val.getValue().getStringValue(cms))) { 257 if (val.getValue() instanceof CmsXmlHtmlValue) { 258 // for HTML values, we need to copy the old value first so the link table is filled 259 val.getValue().setStringValue(cms, origValue.getStringValue(cms)); 260 } 261 setTranslationValue(cms, val.getValue(), entry.getValue()); 262 numTranslatedFields += 1; 263 } 264 } catch (Exception e) { 265 LOG.debug(e.getLocalizedMessage(), e); 266 } 267 } 268 } 269 270 if (numTranslatedFields > 0) { 271 context.setResult(content); 272 context.setHtmlMessage(buildFeedbackHtml(cms, sourceLocale, targetLocale, numTranslatedFields)); 273 context.setNextLocale(targetLocale); 274 } else { 275 String nothingTranslated = Messages.get().getBundle(wpLocale).key( 276 Messages.GUI_TRANSLATION_NOTHING_TRANSLATED_0); 277 context.setHtmlMessage("<p>" + nothingTranslated + "</p>"); 278 } 279 280 } 281 282 }; 283 284 } 285 286 /** 287 * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#initConfiguration() 288 */ 289 @Override 290 public void initConfiguration() throws CmsConfigurationException { 291 292 } 293 294 @Override 295 public void initialize(CmsObject cms) { 296 297 } 298 299 /** 300 * @see org.opencms.ade.contenteditor.I_CmsContentTranslator#isEnabled(org.opencms.file.CmsObject, org.opencms.ade.configuration.CmsADEConfigData, org.opencms.file.CmsFile) 301 */ 302 @Override 303 public boolean isEnabled(CmsObject cms, CmsADEConfigData config, CmsFile file) { 304 305 return OpenCms.getSecretStore().getSecret(SECRET_API_KEY) != null; 306 307 } 308 309 /** 310 * Gets (and lazily creates if necessary) the target locale mapping. 311 * 312 * <p>DeepL API only lists en-US / en-GB for English, and pt-BR / pt-PT for Portuguese as target languages. 313 * Using just en or pt might work, but it's probably a good idea to be specific, 314 * So if just en or pt is specified, we map it to the variant that's more widely used by default. 315 * 316 * <p>This can be overriden by the configuration parameter targetLocaleMapping, which has the form l1:r1|l2:r2|..., where the li is 317 * the locale to map, and the ri is the locale to map to. 318 * @return 319 */ 320 protected Map<Locale, Locale> getTargetLocaleMapping() { 321 322 if (m_targetLocaleMapping == null) { 323 Map<Locale, Locale> targetLocaleMapping = new HashMap<>(); 324 targetLocaleMapping.put(Locale.ENGLISH, Locale.US); 325 targetLocaleMapping.put(new Locale("pt"), Locale.forLanguageTag("pt-PT")); 326 String localeMappingStr = m_params.get(PARAM_TARGET_LOCALE_MAPPING); 327 if (!CmsStringUtil.isEmptyOrWhitespaceOnly(localeMappingStr)) { 328 Map<String, String> mappings = CmsStringUtil.splitAsMap(localeMappingStr.trim(), "|", ":"); 329 for (Map.Entry<String, String> entry : mappings.entrySet()) { 330 targetLocaleMapping.put( 331 LocaleUtils.toLocale(entry.getKey()), 332 LocaleUtils.toLocale(entry.getValue())); 333 } 334 } 335 m_targetLocaleMapping = targetLocaleMapping; 336 } 337 338 return m_targetLocaleMapping; 339 } 340 341 /** 342 * Builds the HTML for the feedback screen. 343 * 344 * @param cms the CMS context 345 * @param sourceLocale the translation source locale 346 * @param targetLocale the translation target locale 347 * @param numSuccessfulFieldUpdates the number of translated fields 348 * @param conflictFields the list of fields with conflicts 349 * @return 350 */ 351 private String buildFeedbackHtml( 352 CmsObject cms, 353 Locale sourceLocale, 354 Locale targetLocale, 355 int numSuccessfulFieldUpdates) { 356 357 StringBuilder buffer = new StringBuilder(); 358 Locale wpLocale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms); 359 buffer.append("<p>"); 360 buffer.append( 361 Messages.get().getBundle(wpLocale).key( 362 Messages.GUI_TRANSLATION_FEEDBACK_3, 363 numSuccessfulFieldUpdates, 364 sourceLocale.getDisplayName(wpLocale), 365 targetLocale.getDisplayName(wpLocale))); 366 buffer.append("</p>"); 367 return buffer.toString(); 368 } 369 370 /** 371 * Gets the available source languages from the client, but caches them for later calls. 372 * 373 * @param client the DeepL client 374 * @return the available source languages 375 */ 376 private List<Language> getSourceLanguages(DeepLClient client) { 377 378 if (m_sourceLanguages == null) { 379 try { 380 m_sourceLanguages = client.getSourceLanguages(); 381 } catch (Exception e) { 382 throw new RuntimeException(e); 383 } 384 } 385 return m_sourceLanguages; 386 } 387 388 /** 389 * Gets the available target languages from the DeepL client, but caches them for later calls 390 * 391 * @param client the DeepL client 392 * @return the available target languages 393 */ 394 private List<Language> getTargetLanguages(DeepLClient client) { 395 396 if (m_targetLanguages == null) { 397 try { 398 m_targetLanguages = client.getTargetLanguages(); 399 } catch (Exception e) { 400 throw new RuntimeException(e); 401 } 402 } 403 return m_targetLanguages; 404 405 } 406 407 /** 408 * Gets the value to translate, which in the case of HTML values is the value with unexpanded macros, and the normal string value otherwise. 409 * 410 * @param cms the CMS context 411 * @param value an XML content value 412 * 413 * @return the value to translate 414 */ 415 private String getTranslationValue(CmsObject cms, I_CmsXmlContentValue value) { 416 417 if (value instanceof CmsXmlHtmlValue) { 418 return ((CmsXmlHtmlValue)value).getRawContent(); 419 } else { 420 return value.getStringValue(cms); 421 } 422 } 423 424 /** 425 * Sets the translated value, which in the case of HTML values sets just the content node of the value (with unexpanded macros), and sets the value normally otherwise. 426 * 427 * @param cms the CMS context 428 * @param value the value to modify 429 * @param newValue 430 * @param newValue 431 */ 432 private void setTranslationValue(CmsObject cms, I_CmsXmlContentValue value, String newValue) { 433 434 if (value instanceof CmsXmlHtmlValue) { 435 ((CmsXmlHtmlValue)value).setRawContent(newValue); 436 } else { 437 value.setStringValue(cms, newValue); 438 } 439 440 } 441 442}