001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.solr.spellchecking;
029
030import org.opencms.file.CmsObject;
031import org.opencms.json.JSONArray;
032import org.opencms.json.JSONException;
033import org.opencms.json.JSONObject;
034import org.opencms.main.CmsLog;
035import org.opencms.main.OpenCms;
036import org.opencms.search.Messages;
037import org.opencms.security.CmsPermissionViolationException;
038import org.opencms.security.CmsRole;
039import org.opencms.security.CmsRoleViolationException;
040
041import java.io.IOException;
042import java.io.PrintWriter;
043import java.util.ArrayList;
044import java.util.LinkedList;
045import java.util.List;
046import java.util.Map;
047import java.util.StringTokenizer;
048
049import javax.servlet.ServletRequest;
050import javax.servlet.http.HttpServletResponse;
051
052import org.apache.commons.logging.Log;
053import org.apache.solr.client.solrj.SolrClient;
054import org.apache.solr.client.solrj.SolrQuery;
055import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
056import org.apache.solr.client.solrj.response.QueryResponse;
057import org.apache.solr.client.solrj.response.SpellCheckResponse;
058import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;
059import org.apache.solr.common.params.ModifiableSolrParams;
060import org.apache.solr.core.CoreContainer;
061import org.apache.solr.core.SolrCore;
062
063/**
064 * CmsSolrSpellchecker is used to perform spellchecking in OpenCms by using Solr. The JSON-formatted result of the
065 * spellchecking operation contains suggestions for misspelled words and is compatible with the expected structure
066 * of the tinyMCE editor.
067 */
068public final class CmsSolrSpellchecker {
069
070    /** The spellcheck core name. */
071    public static final String SPELLCHECKER_INDEX_CORE = "spellcheck";
072
073    /** Logging facility for this class. */
074    private static final Log LOG = CmsLog.getLog(CmsSolrSpellchecker.class);
075
076    /** The singleton instance of this class. */
077    private static CmsSolrSpellchecker instance;
078
079    /** Constant, defining the default spellchecker language. */
080    private static final String LANG_DEFAULT = "en";
081
082    /** Constant, defining the JSON 'id'-field key. */
083    private static final String JSON_ID = "id";
084
085    /** Constant, defining the JSON 'lang'-field key. */
086    private static final String JSON_LANG = "lang";
087
088    /** Constant, defining the JSON 'error'-field key. */
089    private static final String JSON_ERROR = "error";
090
091    /** Constant, defining the JSON 'words'-field key. */
092    private static final String JSON_WORDS = "words";
093
094    /** Constant, defining the JSON 'params'-field key. */
095    private static final String JSON_PARAMS = "params";
096
097    /** Constant, defining the JSON 'result'-field key. */
098    private static final String JSON_RESULT = "result";
099
100    /** Constant, defining the parameter name containing the words. */
101    private static final String HTTP_PARAMETER_WORDS = "words";
102
103    /** Constant, defining the parameter name containing the language. */
104    private static final String HTTP_PARAMETER_LANG = "lang";
105
106    /** Constant, defining the parameter name used to force rebuild the index. */
107    private static final String HTTP_PARAMTER_REBUILD = "rebuild";
108
109    /** Constant, defining the parameter name used to check and rebuild the index. */
110    private static final String HTTP_PARAMETER_CHECKREBUILD = "check";
111
112    /** The SolrCore object. */
113    private SolrCore m_core;
114
115    /** The Solr CoreContainer object. */
116    private CoreContainer m_coreContainer;
117
118    /** The SolrClient object. */
119    private SolrClient m_solrClient;
120
121    /**
122     * Private constructor due to usage of the Singleton pattern.
123     *
124     * @param container Solr CoreContainer container object.
125     * @param core The Solr Core object.
126     */
127    private CmsSolrSpellchecker(CoreContainer container, SolrCore core) {
128
129        if ((null == container) || (null == core)) {
130            throw new IllegalArgumentException();
131        }
132
133        m_core = core;
134        m_coreContainer = container;
135        m_solrClient = new EmbeddedSolrServer(m_coreContainer, m_core.getName());
136    }
137
138    /**
139     * Return an instance of this class.
140     *
141     * @return instance of CmsSolrSpellchecker
142     */
143    public static CmsSolrSpellchecker getInstance() {
144
145        return instance;
146    }
147
148    /**
149     * Return an instance of this class.
150     *
151     * @param container Solr CoreContainer container object in order to create a server object.
152     *
153     * @return instance of CmsSolrSpellchecker
154     */
155    public static CmsSolrSpellchecker getInstance(CoreContainer container) {
156
157        if (null == instance) {
158            synchronized (CmsSolrSpellchecker.class) {
159                if (null == instance) {
160                    @SuppressWarnings("resource")
161                    SolrCore spellcheckCore = container.getCore(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE);
162                    if (spellcheckCore == null) {
163                        LOG.error(
164                            Messages.get().getBundle().key(
165                                Messages.ERR_SPELLCHECK_CORE_NOT_AVAILABLE_1,
166                                CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE));
167                        return null;
168                    }
169                    instance = new CmsSolrSpellchecker(container, spellcheckCore);
170                }
171            }
172        }
173
174        return instance;
175    }
176
177    /**
178     * Performs spellchecking using Solr and returns the spellchecking results using JSON.
179     *
180     * @param res The HttpServletResponse object.
181     * @param servletRequest The ServletRequest object.
182     * @param cms The CmsObject object.
183     *
184     * @throws CmsPermissionViolationException in case of the anonymous guest user
185     * @throws IOException if writing the response fails
186     */
187    public void getSpellcheckingResult(
188        final HttpServletResponse res,
189        final ServletRequest servletRequest,
190        final CmsObject cms)
191    throws CmsPermissionViolationException, IOException {
192
193        // Perform a permission check
194        performPermissionCheck(cms);
195
196        // Set the appropriate response headers
197        setResponeHeaders(res);
198
199        // Figure out whether a JSON or HTTP request has been sent
200        CmsSpellcheckingRequest cmsSpellcheckingRequest = null;
201        try {
202            String requestBody = getRequestBody(servletRequest);
203            final JSONObject jsonRequest = new JSONObject(requestBody);
204            cmsSpellcheckingRequest = parseJsonRequest(jsonRequest);
205        } catch (Exception e) {
206            LOG.debug(e.getMessage(), e);
207            cmsSpellcheckingRequest = parseHttpRequest(servletRequest, cms);
208        }
209
210        if ((null != cmsSpellcheckingRequest) && cmsSpellcheckingRequest.isInitialized()) {
211            // Perform the actual spellchecking
212            final SpellCheckResponse spellCheckResponse = performSpellcheckQuery(cmsSpellcheckingRequest);
213
214            /*
215             * The field spellCheckResponse is null when exactly one correctly spelled word is passed to the spellchecker.
216             * In this case it's safe to return an empty JSON formatted map, as the passed word is correct. Otherwise,
217             * convert the spellchecker response into a new JSON formatted map.
218             */
219            if (null == spellCheckResponse) {
220                cmsSpellcheckingRequest.m_wordSuggestions = new JSONObject();
221            } else {
222                cmsSpellcheckingRequest.m_wordSuggestions = getConvertedResponseAsJson(spellCheckResponse);
223            }
224        }
225
226        // Send response back to the client
227        sendResponse(res, cmsSpellcheckingRequest);
228    }
229
230    /**
231     * Parses and adds dictionaries to the Solr index.
232     *
233     * @param cms the OpenCms object.
234     *
235     * @throws CmsRoleViolationException in case the user does not have the required role ROOT_ADMIN
236     */
237    public void parseAndAddDictionaries(CmsObject cms) throws CmsRoleViolationException {
238
239        OpenCms.getRoleManager().checkRole(cms, CmsRole.ROOT_ADMIN);
240        CmsSpellcheckDictionaryIndexer.parseAndAddZippedDictionaries(m_solrClient, cms);
241        CmsSpellcheckDictionaryIndexer.parseAndAddDictionaries(m_solrClient, cms);
242    }
243
244    /**
245     * Converts the suggestions from the Solrj format to JSON format.
246     *
247     * @param response The SpellCheckResponse object containing the spellcheck results.
248     * @return The spellcheck suggestions as JSON object or null if something goes wrong.
249     */
250    private JSONObject getConvertedResponseAsJson(SpellCheckResponse response) {
251
252        if (null == response) {
253            return null;
254        }
255
256        final JSONObject suggestions = new JSONObject();
257        final Map<String, Suggestion> solrSuggestions = response.getSuggestionMap();
258
259        // Add suggestions to the response
260        for (final String key : solrSuggestions.keySet()) {
261
262            // Indicator to ignore words that are erroneously marked as misspelled.
263            boolean ignoreWord = false;
264
265            // Suggestions that are in the form "Xxxx" -> "xxxx" should be ignored.
266            if (Character.isUpperCase(key.codePointAt(0))) {
267                final String lowercaseKey = key.toLowerCase();
268                // If the suggestion map doesn't contain the lowercased word, ignore this entry.
269                if (!solrSuggestions.containsKey(lowercaseKey)) {
270                    ignoreWord = true;
271                }
272            }
273
274            if (!ignoreWord) {
275                try {
276                    // Get suggestions as List
277                    final List<String> l = solrSuggestions.get(key).getAlternatives();
278                    suggestions.put(key, l);
279                } catch (JSONException e) {
280                    LOG.debug("Exception while converting Solr spellcheckresponse to JSON. ", e);
281                }
282            }
283        }
284
285        return suggestions;
286    }
287
288    /**
289     * Returns the result of the performed spellcheck formatted in JSON.
290     *
291     * @param request The CmsSpellcheckingRequest.
292     * @return JSONObject that contains the result of the performed spellcheck.
293     */
294    private JSONObject getJsonFormattedSpellcheckResult(CmsSpellcheckingRequest request) {
295
296        final JSONObject response = new JSONObject();
297
298        try {
299            if (null != request.m_id) {
300                response.put(JSON_ID, request.m_id);
301            }
302
303            response.put(JSON_RESULT, request.m_wordSuggestions);
304
305        } catch (Exception e) {
306            try {
307                response.put(JSON_ERROR, true);
308                LOG.debug("Error while assembling spellcheck response in JSON format.", e);
309            } catch (JSONException ex) {
310                LOG.debug("Error while assembling spellcheck response in JSON format.", ex);
311            }
312        }
313
314        return response;
315    }
316
317    /**
318     * Returns the body of the request. This method is used to read posted JSON data.
319     *
320     * @param request The request.
321     *
322     * @return String representation of the request's body.
323     *
324     * @throws IOException in case reading the request fails
325     */
326    private String getRequestBody(ServletRequest request) throws IOException {
327
328        final StringBuilder sb = new StringBuilder();
329
330        String line = request.getReader().readLine();
331        while (null != line) {
332            sb.append(line);
333            line = request.getReader().readLine();
334        }
335
336        return sb.toString();
337    }
338
339    /**
340     * Parse parameters from this request using HTTP.
341     *
342     * @param req The ServletRequest containing all request parameters.
343     * @param cms The OpenCms object.
344     * @return CmsSpellcheckingRequest object that contains parsed parameters.
345     */
346    private CmsSpellcheckingRequest parseHttpRequest(final ServletRequest req, final CmsObject cms) {
347
348        if ((null != cms) && OpenCms.getRoleManager().hasRole(cms, CmsRole.ROOT_ADMIN)) {
349            try {
350                if (null != req.getParameter(HTTP_PARAMETER_CHECKREBUILD)) {
351                    if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(cms)) {
352
353                        parseAndAddDictionaries(cms);
354
355                    }
356                }
357
358                if (null != req.getParameter(HTTP_PARAMTER_REBUILD)) {
359                    parseAndAddDictionaries(cms);
360                }
361            } catch (CmsRoleViolationException e) {
362                LOG.error(e.getLocalizedMessage(), e);
363            }
364        }
365
366        final String q = req.getParameter(HTTP_PARAMETER_WORDS);
367
368        if (null == q) {
369            LOG.debug("Invalid HTTP request: No parameter \"" + HTTP_PARAMETER_WORDS + "\" defined. ");
370            return null;
371        }
372
373        final StringTokenizer st = new StringTokenizer(q);
374        final List<String> wordsToCheck = new ArrayList<String>();
375        while (st.hasMoreTokens()) {
376            final String word = st.nextToken();
377            wordsToCheck.add(word);
378
379            if (Character.isUpperCase(word.codePointAt(0))) {
380                wordsToCheck.add(word.toLowerCase());
381            }
382        }
383
384        final String[] w = wordsToCheck.toArray(new String[wordsToCheck.size()]);
385        final String dict = req.getParameter(HTTP_PARAMETER_LANG) == null
386        ? LANG_DEFAULT
387        : req.getParameter(HTTP_PARAMETER_LANG);
388
389        return new CmsSpellcheckingRequest(w, dict);
390    }
391
392    /**
393     * Parse JSON parameters from this request.
394     *
395     * @param jsonRequest The request in the JSON format.
396     * @return CmsSpellcheckingRequest object that contains parsed parameters or null, if JSON input is not well
397     * defined.
398     */
399    private CmsSpellcheckingRequest parseJsonRequest(JSONObject jsonRequest) {
400
401        final String id = jsonRequest.optString(JSON_ID);
402
403        final JSONObject params = jsonRequest.optJSONObject(JSON_PARAMS);
404
405        if (null == params) {
406            LOG.debug("Invalid JSON request: No field \"params\" defined. ");
407            return null;
408        }
409        final JSONArray words = params.optJSONArray(JSON_WORDS);
410        final String lang = params.optString(JSON_LANG, LANG_DEFAULT);
411        if (null == words) {
412            LOG.debug("Invalid JSON request: No field \"words\" defined. ");
413            return null;
414        }
415
416        // Convert JSON array to array of type String
417        final List<String> wordsToCheck = new LinkedList<String>();
418        for (int i = 0; i < words.length(); i++) {
419            final String word = words.opt(i).toString();
420            wordsToCheck.add(word);
421
422            if (Character.isUpperCase(word.codePointAt(0))) {
423                wordsToCheck.add(word.toLowerCase());
424            }
425        }
426
427        return new CmsSpellcheckingRequest(wordsToCheck.toArray(new String[wordsToCheck.size()]), lang, id);
428    }
429
430    /**
431     * Perform a security check against OpenCms.
432     *
433     * @param cms The OpenCms object.
434     *
435     * @throws CmsPermissionViolationException in case of the anonymous guest user
436     */
437    private void performPermissionCheck(CmsObject cms) throws CmsPermissionViolationException {
438
439        if (cms.getRequestContext().getCurrentUser().isGuestUser()) {
440            throw new CmsPermissionViolationException(null);
441        }
442    }
443
444    /**
445     * Performs the actual spell check query using Solr.
446     *
447     * @param request the spell check request
448     *
449     * @return Results of the Solr spell check of type SpellCheckResponse or null if something goes wrong.
450     */
451    private SpellCheckResponse performSpellcheckQuery(CmsSpellcheckingRequest request) {
452
453        if ((null == request) || !request.isInitialized()) {
454            return null;
455        }
456
457        final String[] wordsToCheck = request.m_wordsToCheck;
458
459        final ModifiableSolrParams params = new ModifiableSolrParams();
460        params.set("spellcheck", "true");
461        params.set("spellcheck.dictionary", request.m_dictionaryToUse);
462        params.set("spellcheck.extendedResults", "true");
463
464        // Build one string from array of words and use it as query.
465        final StringBuilder builder = new StringBuilder();
466        for (int i = 0; i < wordsToCheck.length; i++) {
467            builder.append(wordsToCheck[i] + " ");
468        }
469
470        params.set("spellcheck.q", builder.toString());
471
472        final SolrQuery query = new SolrQuery();
473        query.setRequestHandler("/spell");
474        query.add(params);
475
476        try {
477            QueryResponse qres = m_solrClient.query(query);
478            return qres.getSpellCheckResponse();
479        } catch (Exception e) {
480            LOG.debug("Exception while performing spellcheck query...", e);
481        }
482
483        return null;
484    }
485
486    /**
487     * Sends the JSON-formatted spellchecking results to the client.
488     *
489     * @param res The HttpServletResponse object.
490     * @param request The spellchecking request object.
491     *
492     * @throws IOException in case writing the response fails
493     */
494    private void sendResponse(final HttpServletResponse res, final CmsSpellcheckingRequest request) throws IOException {
495
496        final PrintWriter pw = res.getWriter();
497        final JSONObject response = getJsonFormattedSpellcheckResult(request);
498        pw.println(response.toString());
499        pw.close();
500    }
501
502    /**
503     * Sets the appropriate headers to response of this request.
504     *
505     * @param response The HttpServletResponse response object.
506     */
507    private void setResponeHeaders(HttpServletResponse response) {
508
509        response.setHeader("Cache-Control", "no-store, no-cache");
510        response.setHeader("Pragma", "no-cache");
511        response.setDateHeader("Expires", System.currentTimeMillis());
512        response.setContentType("text/plain; charset=utf-8");
513        response.setCharacterEncoding("utf-8");
514    }
515}