001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (https://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: https://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: https://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.ai;
029
030import org.opencms.ade.configuration.CmsADEConfigData;
031import org.opencms.ade.contenteditor.I_CmsContentTranslator;
032import org.opencms.ai.CmsTranslationUtil.FoundOrCreatedValue;
033import org.opencms.configuration.CmsConfigurationException;
034import org.opencms.configuration.CmsParameterConfiguration;
035import org.opencms.file.CmsFile;
036import org.opencms.file.CmsObject;
037import org.opencms.gwt.shared.CmsGwtConstants;
038import org.opencms.i18n.CmsLocaleManager;
039import org.opencms.main.CmsLog;
040import org.opencms.main.OpenCms;
041import org.opencms.util.CmsStringUtil;
042import org.opencms.xml.content.CmsXmlContent;
043import org.opencms.xml.content.I_CmsXmlContentAugmentation;
044import org.opencms.xml.types.CmsXmlHtmlValue;
045import org.opencms.xml.types.I_CmsXmlContentValue;
046
047import java.nio.charset.StandardCharsets;
048import java.util.ArrayList;
049import java.util.Arrays;
050import java.util.Collection;
051import java.util.HashMap;
052import java.util.List;
053import java.util.Locale;
054import java.util.Map;
055import java.util.stream.Collectors;
056
057import org.apache.commons.lang3.LocaleUtils;
058import org.apache.commons.logging.Log;
059
060import com.deepl.api.DeepLClient;
061import com.deepl.api.DeepLClientOptions;
062import com.deepl.api.Language;
063import com.deepl.api.TextResult;
064import com.deepl.api.TextTranslationOptions;
065import org.opencms.ai.*;
066import com.google.common.collect.ArrayListMultimap;
067import com.google.common.collect.Multimap;
068
069public class CmsDeeplTranslation implements I_CmsContentTranslator {
070
071    /** Key used for looking up the API key in the secret store. */
072    public static final String SECRET_API_KEY = "contenteditor.translation.deepl.apiKey";
073
074    /** The logger instance for this class. */
075    private static final Log LOG = CmsLog.getLog(CmsDeeplTranslation.class);
076
077    /** Configuration parameter for determining a target locale mapping. */
078    protected static final Object PARAM_TARGET_LOCALE_MAPPING = "targetLocaleMapping";
079
080    /** Cached available target languages. */
081    private volatile List<Language> m_targetLanguages;
082
083    /** Cached available source languages. */
084    private volatile List<Language> m_sourceLanguages;
085
086    /** The parameters from the configuration. */
087    private CmsParameterConfiguration m_params = new CmsParameterConfiguration();
088
089    /** The parsed target locale mapping. */
090    private volatile Map<Locale, Locale> m_targetLocaleMapping;
091
092    /**
093     * Tries to find a suitable Language object from a list of DeepL-supported languages that matches a given locale.
094     *
095     * @param locale the locale
096     * @param languages the languages
097     * @return the matching language
098     */
099    public static Language getMatchingLanguage(Locale locale, List<Language> languages) {
100
101        for (Language language : languages) {
102            if (Locale.forLanguageTag(language.getCode()).equals(locale)) {
103                return language;
104            }
105        }
106        // if not found, fall back to just using the language portion
107        for (Language language : languages) {
108            Locale l1 = new Locale(Locale.forLanguageTag(language.getCode()).getLanguage());
109            Locale l2 = new Locale(locale.getLanguage());
110            if (l1.equals(l2)) {
111                return language;
112            }
113        }
114        return null;
115    }
116
117    /**
118     * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#addConfigurationParameter(java.lang.String, java.lang.String)
119     */
120    @Override
121    public void addConfigurationParameter(String paramName, String paramValue) {
122
123        m_params.add(paramName, paramValue);
124
125    }
126
127    /**
128     * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration()
129     */
130    @Override
131    public CmsParameterConfiguration getConfiguration() {
132
133        return m_params;
134    }
135
136    /**
137     * @see org.opencms.ade.contenteditor.I_CmsContentTranslator#getContentAugmentation()
138     */
139    @Override
140    public I_CmsXmlContentAugmentation getContentAugmentation() {
141
142        return new I_CmsXmlContentAugmentation() {
143
144            @Override
145            public void augmentContent(Context context) throws Exception {
146
147                CmsObject cms = context.getCmsObject();
148                final Locale wpLocale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms);
149                CmsXmlContent content = context.getContent();
150                Locale sourceLocale = context.getLocale();
151                String targetLocaleParam = context.getParameter(CmsGwtConstants.PARAM_TARGET_LOCALE);
152                String apiKey = OpenCms.getSecretStore().getSecret(SECRET_API_KEY);
153                apiKey = apiKey.trim();
154                Locale targetLocale = CmsLocaleManager.getLocale(targetLocaleParam);
155
156                DeepLClientOptions options = new DeepLClientOptions();
157                DeepLClient client = new DeepLClient(apiKey, options);
158
159                String html = CmsTranslationUtil.getWaitMessage(wpLocale);
160                context.progress(html);
161
162                List<Language> sourceLanguages = getSourceLanguages(client);
163                List<Language> targetLanguages = getTargetLanguages(client);
164                Language srcLang = getMatchingLanguage(sourceLocale, sourceLanguages);
165                Language targetLang = getMatchingLanguage(
166                    getTargetLocaleMapping().getOrDefault(targetLocale, targetLocale),
167                    targetLanguages);
168
169                List<I_CmsXmlContentValue> values = CmsTranslationUtil.getValuesToTranslate(
170                    cms,
171                    content,
172                    sourceLocale,
173                    targetLocale);
174
175                // DeepL can translate multiple strings in one API call. But the total request size is limited,
176                // and also all strings in that API call are translated with the same translation options.
177                // So we first group the values to be translated by whether they are HTML values or not
178                // (This is because when we use HTML mode for non-HTML values, it may replace <, > with entities.).
179                // Then we split each of these two groups into smaller batches which can be translated in one API call each.
180
181                Multimap<Boolean, I_CmsXmlContentValue> valuesByType = ArrayListMultimap.create();
182                for (I_CmsXmlContentValue value : values) {
183                    valuesByType.put(value instanceof CmsXmlHtmlValue, value);
184                }
185
186                // Collect *all* translation results in this map
187                Map<String, String> valuesToSet = new HashMap<>();
188
189                for (Boolean isHtml : Arrays.asList(Boolean.FALSE, Boolean.TRUE)) {
190                    TextTranslationOptions translationOptions = new TextTranslationOptions();
191                    if (isHtml) {
192                        translationOptions.setTagHandlingVersion("v2");
193                        translationOptions.setTagHandling("html");
194                    }
195                    Collection<I_CmsXmlContentValue> valuesForCurrentMode = valuesByType.get(isHtml);
196                    if (valuesForCurrentMode.isEmpty()) {
197                        continue;
198                    }
199                    List<List<I_CmsXmlContentValue>> batches = new ArrayList<>();
200
201                    // Max request size for the API is 128K, but this also includes JSON punctuation and additional parameters, so we leave a wide safety margin
202                    final int limit = 15;
203                    int currentBatchSize = 0;
204                    List<I_CmsXmlContentValue> currentBatch = new ArrayList<>();
205                    batches.add(currentBatch);
206                    for (I_CmsXmlContentValue val : valuesForCurrentMode) {
207                        String strValue = getTranslationValue(cms, val);
208                        long byteSize = strValue.getBytes(StandardCharsets.UTF_8).length;
209                        if ((byteSize + currentBatchSize) > limit) {
210                            currentBatch = new ArrayList<>();
211                            batches.add(currentBatch);
212                            currentBatchSize = 0;
213                        }
214                        currentBatch.add(val);
215                        currentBatchSize += byteSize;
216                    }
217
218                    for (List<I_CmsXmlContentValue> batch : batches) {
219                        if (batch.size() == 0) {
220                            continue;
221                        }
222
223                        List<String> inputs = batch.stream().map(val -> getTranslationValue(cms, val)).collect(
224                            Collectors.toList());
225                        List<TextResult> outputs = client.translateText(
226                            inputs,
227                            srcLang,
228                            targetLang,
229                            translationOptions);
230                        for (int i = 0; i < batch.size(); i++) {
231                            valuesToSet.put(batch.get(i).getPath(), outputs.get(i).getText());
232                        }
233                    }
234                }
235                int numTranslatedFields = 0;
236
237                if (!content.hasLocale(targetLocale)) {
238                    content.copyLocale(sourceLocale, targetLocale);
239                    for (Map.Entry<String, String> entry : valuesToSet.entrySet()) {
240                        I_CmsXmlContentValue targetValue = content.getValue(entry.getKey(), targetLocale);
241                        setTranslationValue(cms, targetValue, entry.getValue());
242                        numTranslatedFields += 1;
243                    }
244                } else {
245                    for (Map.Entry<String, String> entry : valuesToSet.entrySet()) {
246                        I_CmsXmlContentValue origValue = content.getValue(entry.getKey(), sourceLocale);
247                        try {
248                            FoundOrCreatedValue val = CmsTranslationUtil.findOrCreateValue(
249                                cms,
250                                content,
251                                targetLocale,
252                                origValue.getPath());
253                            // If the value already existed, we only want to write to it if it's empty.
254                            // But if it was just created, it might have a default value, which we need to overwrite.
255                            if (val.wasCreated()
256                                || CmsStringUtil.isEmptyOrWhitespaceOnly(val.getValue().getStringValue(cms))) {
257                                if (val.getValue() instanceof CmsXmlHtmlValue) {
258                                    // for HTML values, we need to copy the old value first so the link table is filled
259                                    val.getValue().setStringValue(cms, origValue.getStringValue(cms));
260                                }
261                                setTranslationValue(cms, val.getValue(), entry.getValue());
262                                numTranslatedFields += 1;
263                            }
264                        } catch (Exception e) {
265                            LOG.debug(e.getLocalizedMessage(), e);
266                        }
267                    }
268                }
269
270                if (numTranslatedFields > 0) {
271                    context.setResult(content);
272                    context.setHtmlMessage(buildFeedbackHtml(cms, sourceLocale, targetLocale, numTranslatedFields));
273                    context.setNextLocale(targetLocale);
274                } else {
275                    String nothingTranslated = Messages.get().getBundle(wpLocale).key(
276                        Messages.GUI_TRANSLATION_NOTHING_TRANSLATED_0);
277                    context.setHtmlMessage("<p>" + nothingTranslated + "</p>");
278                }
279
280            }
281
282        };
283
284    }
285
286    /**
287     * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#initConfiguration()
288     */
289    @Override
290    public void initConfiguration() throws CmsConfigurationException {
291
292    }
293
294    @Override
295    public void initialize(CmsObject cms) {
296
297    }
298
299    /**
300     * @see org.opencms.ade.contenteditor.I_CmsContentTranslator#isEnabled(org.opencms.file.CmsObject, org.opencms.ade.configuration.CmsADEConfigData, org.opencms.file.CmsFile)
301     */
302    @Override
303    public boolean isEnabled(CmsObject cms, CmsADEConfigData config, CmsFile file) {
304
305        return OpenCms.getSecretStore().getSecret(SECRET_API_KEY) != null;
306
307    }
308
309    /**
310     * Gets (and lazily creates if necessary) the target locale mapping.
311     *
312     * <p>DeepL API only lists en-US / en-GB  for English, and pt-BR / pt-PT for Portuguese as target languages.
313     * Using just en or pt might work, but it's probably a good idea to be specific,
314     * So if just en or pt is specified, we map it to the variant that's more widely used by default.
315     *
316     * <p>This can be overriden by the configuration parameter targetLocaleMapping, which has the form l1:r1|l2:r2|..., where the li is
317     * the locale to map, and the ri is the locale to map to.
318     * @return
319     */
320    protected Map<Locale, Locale> getTargetLocaleMapping() {
321
322        if (m_targetLocaleMapping == null) {
323            Map<Locale, Locale> targetLocaleMapping = new HashMap<>();
324            targetLocaleMapping.put(Locale.ENGLISH, Locale.US);
325            targetLocaleMapping.put(new Locale("pt"), Locale.forLanguageTag("pt-PT"));
326            String localeMappingStr = m_params.get(PARAM_TARGET_LOCALE_MAPPING);
327            if (!CmsStringUtil.isEmptyOrWhitespaceOnly(localeMappingStr)) {
328                Map<String, String> mappings = CmsStringUtil.splitAsMap(localeMappingStr.trim(), "|", ":");
329                for (Map.Entry<String, String> entry : mappings.entrySet()) {
330                    targetLocaleMapping.put(
331                        LocaleUtils.toLocale(entry.getKey()),
332                        LocaleUtils.toLocale(entry.getValue()));
333                }
334            }
335            m_targetLocaleMapping = targetLocaleMapping;
336        }
337
338        return m_targetLocaleMapping;
339    }
340
341    /**
342     * Builds the HTML for the feedback screen.
343     *
344     * @param cms the CMS context
345     * @param sourceLocale the translation source locale
346     * @param targetLocale the translation target locale
347     * @param numSuccessfulFieldUpdates the number of translated fields
348     * @param conflictFields the list of fields with conflicts
349     * @return
350     */
351    private String buildFeedbackHtml(
352        CmsObject cms,
353        Locale sourceLocale,
354        Locale targetLocale,
355        int numSuccessfulFieldUpdates) {
356
357        StringBuilder buffer = new StringBuilder();
358        Locale wpLocale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms);
359        buffer.append("<p>");
360        buffer.append(
361            Messages.get().getBundle(wpLocale).key(
362                Messages.GUI_TRANSLATION_FEEDBACK_3,
363                numSuccessfulFieldUpdates,
364                sourceLocale.getDisplayName(wpLocale),
365                targetLocale.getDisplayName(wpLocale)));
366        buffer.append("</p>");
367        return buffer.toString();
368    }
369
370    /**
371     * Gets the available source languages from the client, but caches them for later calls.
372     *
373     * @param client the DeepL client
374     * @return the available source languages
375     */
376    private List<Language> getSourceLanguages(DeepLClient client) {
377
378        if (m_sourceLanguages == null) {
379            try {
380                m_sourceLanguages = client.getSourceLanguages();
381            } catch (Exception e) {
382                throw new RuntimeException(e);
383            }
384        }
385        return m_sourceLanguages;
386    }
387
388    /**
389     * Gets the available target languages from the DeepL client, but caches them for later calls
390     *
391     * @param client the DeepL client
392     * @return the available target languages
393     */
394    private List<Language> getTargetLanguages(DeepLClient client) {
395
396        if (m_targetLanguages == null) {
397            try {
398                m_targetLanguages = client.getTargetLanguages();
399            } catch (Exception e) {
400                throw new RuntimeException(e);
401            }
402        }
403        return m_targetLanguages;
404
405    }
406
407    /**
408     * Gets the value to translate, which in the case of HTML values is the value with unexpanded macros, and the normal string value otherwise.
409     *
410     * @param cms the CMS context
411     * @param value an XML content value
412     *
413     * @return the value to translate
414     */
415    private String getTranslationValue(CmsObject cms, I_CmsXmlContentValue value) {
416
417        if (value instanceof CmsXmlHtmlValue) {
418            return ((CmsXmlHtmlValue)value).getRawContent();
419        } else {
420            return value.getStringValue(cms);
421        }
422    }
423
424    /**
425     * Sets the translated value, which in the case of HTML values sets just the content node of the value (with unexpanded macros), and sets the value normally otherwise.
426     *
427     * @param cms the CMS context
428     * @param value the value to modify
429     * @param newValue
430     * @param newValue
431     */
432    private void setTranslationValue(CmsObject cms, I_CmsXmlContentValue value, String newValue) {
433
434        if (value instanceof CmsXmlHtmlValue) {
435            ((CmsXmlHtmlValue)value).setRawContent(newValue);
436        } else {
437            value.setStringValue(cms, newValue);
438        }
439
440    }
441
442}