001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.solr.spellchecking; 029 030import org.opencms.file.CmsObject; 031import org.opencms.json.JSONArray; 032import org.opencms.json.JSONException; 033import org.opencms.json.JSONObject; 034import org.opencms.main.CmsLog; 035import org.opencms.main.OpenCms; 036import org.opencms.search.Messages; 037import org.opencms.security.CmsPermissionViolationException; 038import org.opencms.security.CmsRole; 039import org.opencms.security.CmsRoleViolationException; 040 041import java.io.IOException; 042import java.io.PrintWriter; 043import java.util.ArrayList; 044import java.util.LinkedList; 045import java.util.List; 046import java.util.Map; 047import java.util.StringTokenizer; 048 049import javax.servlet.ServletRequest; 050import javax.servlet.http.HttpServletResponse; 051 052import org.apache.commons.logging.Log; 053import org.apache.solr.client.solrj.SolrClient; 054import org.apache.solr.client.solrj.SolrQuery; 055import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 056import org.apache.solr.client.solrj.response.QueryResponse; 057import org.apache.solr.client.solrj.response.SpellCheckResponse; 058import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion; 059import org.apache.solr.common.params.ModifiableSolrParams; 060import org.apache.solr.core.CoreContainer; 061import org.apache.solr.core.SolrCore; 062 063/** 064 * CmsSolrSpellchecker is used to perform spellchecking in OpenCms by using Solr. The JSON-formatted result of the 065 * spellchecking operation contains suggestions for misspelled words and is compatible with the expected structure 066 * of the tinyMCE editor. 067 */ 068public final class CmsSolrSpellchecker { 069 070 /** The spellcheck core name. */ 071 public static final String SPELLCHECKER_INDEX_CORE = "spellcheck"; 072 073 /** Logging facility for this class. */ 074 private static final Log LOG = CmsLog.getLog(CmsSolrSpellchecker.class); 075 076 /** The singleton instance of this class. */ 077 private static CmsSolrSpellchecker instance; 078 079 /** Constant, defining the default spellchecker language. */ 080 private static final String LANG_DEFAULT = "en"; 081 082 /** Constant, defining the JSON 'id'-field key. */ 083 private static final String JSON_ID = "id"; 084 085 /** Constant, defining the JSON 'lang'-field key. */ 086 private static final String JSON_LANG = "lang"; 087 088 /** Constant, defining the JSON 'error'-field key. */ 089 private static final String JSON_ERROR = "error"; 090 091 /** Constant, defining the JSON 'words'-field key. */ 092 private static final String JSON_WORDS = "words"; 093 094 /** Constant, defining the JSON 'params'-field key. */ 095 private static final String JSON_PARAMS = "params"; 096 097 /** Constant, defining the JSON 'result'-field key. */ 098 private static final String JSON_RESULT = "result"; 099 100 /** Constant, defining the parameter name containing the words. */ 101 private static final String HTTP_PARAMETER_WORDS = "words"; 102 103 /** Constant, defining the parameter name containing the language. */ 104 private static final String HTTP_PARAMETER_LANG = "lang"; 105 106 /** Constant, defining the parameter name used to force rebuild the index. */ 107 private static final String HTTP_PARAMTER_REBUILD = "rebuild"; 108 109 /** Constant, defining the parameter name used to check and rebuild the index. */ 110 private static final String HTTP_PARAMETER_CHECKREBUILD = "check"; 111 112 /** The SolrCore object. */ 113 private SolrCore m_core; 114 115 /** The Solr CoreContainer object. */ 116 private CoreContainer m_coreContainer; 117 118 /** The SolrClient object. */ 119 private SolrClient m_solrClient; 120 121 /** 122 * Private constructor due to usage of the Singleton pattern. 123 * 124 * @param container Solr CoreContainer container object. 125 * @param core The Solr Core object. 126 */ 127 private CmsSolrSpellchecker(CoreContainer container, SolrCore core) { 128 129 if ((null == container) || (null == core)) { 130 throw new IllegalArgumentException(); 131 } 132 133 m_core = core; 134 m_coreContainer = container; 135 m_solrClient = new EmbeddedSolrServer(m_coreContainer, m_core.getName()); 136 } 137 138 /** 139 * Return an instance of this class. 140 * 141 * @return instance of CmsSolrSpellchecker 142 */ 143 public static CmsSolrSpellchecker getInstance() { 144 145 return instance; 146 } 147 148 /** 149 * Return an instance of this class. 150 * 151 * @param container Solr CoreContainer container object in order to create a server object. 152 * 153 * @return instance of CmsSolrSpellchecker 154 */ 155 public static CmsSolrSpellchecker getInstance(CoreContainer container) { 156 157 if (null == instance) { 158 synchronized (CmsSolrSpellchecker.class) { 159 if (null == instance) { 160 @SuppressWarnings("resource") 161 SolrCore spellcheckCore = container.getCore(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE); 162 if (spellcheckCore == null) { 163 LOG.error( 164 Messages.get().getBundle().key( 165 Messages.ERR_SPELLCHECK_CORE_NOT_AVAILABLE_1, 166 CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)); 167 return null; 168 } 169 instance = new CmsSolrSpellchecker(container, spellcheckCore); 170 } 171 } 172 } 173 174 return instance; 175 } 176 177 /** 178 * Performs spellchecking using Solr and returns the spellchecking results using JSON. 179 * 180 * @param res The HttpServletResponse object. 181 * @param servletRequest The ServletRequest object. 182 * @param cms The CmsObject object. 183 * 184 * @throws CmsPermissionViolationException in case of the anonymous guest user 185 * @throws IOException if writing the response fails 186 */ 187 public void getSpellcheckingResult( 188 final HttpServletResponse res, 189 final ServletRequest servletRequest, 190 final CmsObject cms) 191 throws CmsPermissionViolationException, IOException { 192 193 // Perform a permission check 194 performPermissionCheck(cms); 195 196 // Set the appropriate response headers 197 setResponeHeaders(res); 198 199 // Figure out whether a JSON or HTTP request has been sent 200 CmsSpellcheckingRequest cmsSpellcheckingRequest = null; 201 try { 202 String requestBody = getRequestBody(servletRequest); 203 final JSONObject jsonRequest = new JSONObject(requestBody); 204 cmsSpellcheckingRequest = parseJsonRequest(jsonRequest); 205 } catch (Exception e) { 206 LOG.debug(e.getMessage(), e); 207 cmsSpellcheckingRequest = parseHttpRequest(servletRequest, cms); 208 } 209 210 if ((null != cmsSpellcheckingRequest) && cmsSpellcheckingRequest.isInitialized()) { 211 // Perform the actual spellchecking 212 final SpellCheckResponse spellCheckResponse = performSpellcheckQuery(cmsSpellcheckingRequest); 213 214 /* 215 * The field spellCheckResponse is null when exactly one correctly spelled word is passed to the spellchecker. 216 * In this case it's safe to return an empty JSON formatted map, as the passed word is correct. Otherwise, 217 * convert the spellchecker response into a new JSON formatted map. 218 */ 219 if (null == spellCheckResponse) { 220 cmsSpellcheckingRequest.m_wordSuggestions = new JSONObject(); 221 } else { 222 cmsSpellcheckingRequest.m_wordSuggestions = getConvertedResponseAsJson(spellCheckResponse); 223 } 224 } 225 226 // Send response back to the client 227 sendResponse(res, cmsSpellcheckingRequest); 228 } 229 230 /** 231 * Parses and adds dictionaries to the Solr index. 232 * 233 * @param cms the OpenCms object. 234 * 235 * @throws CmsRoleViolationException in case the user does not have the required role ROOT_ADMIN 236 */ 237 public void parseAndAddDictionaries(CmsObject cms) throws CmsRoleViolationException { 238 239 OpenCms.getRoleManager().checkRole(cms, CmsRole.ROOT_ADMIN); 240 CmsSpellcheckDictionaryIndexer.parseAndAddZippedDictionaries(m_solrClient, cms); 241 CmsSpellcheckDictionaryIndexer.parseAndAddDictionaries(m_solrClient, cms); 242 } 243 244 /** 245 * Converts the suggestions from the Solrj format to JSON format. 246 * 247 * @param response The SpellCheckResponse object containing the spellcheck results. 248 * @return The spellcheck suggestions as JSON object or null if something goes wrong. 249 */ 250 private JSONObject getConvertedResponseAsJson(SpellCheckResponse response) { 251 252 if (null == response) { 253 return null; 254 } 255 256 final JSONObject suggestions = new JSONObject(); 257 final Map<String, Suggestion> solrSuggestions = response.getSuggestionMap(); 258 259 // Add suggestions to the response 260 for (final String key : solrSuggestions.keySet()) { 261 262 // Indicator to ignore words that are erroneously marked as misspelled. 263 boolean ignoreWord = false; 264 265 // Suggestions that are in the form "Xxxx" -> "xxxx" should be ignored. 266 if (Character.isUpperCase(key.codePointAt(0))) { 267 final String lowercaseKey = key.toLowerCase(); 268 // If the suggestion map doesn't contain the lowercased word, ignore this entry. 269 if (!solrSuggestions.containsKey(lowercaseKey)) { 270 ignoreWord = true; 271 } 272 } 273 274 if (!ignoreWord) { 275 try { 276 // Get suggestions as List 277 final List<String> l = solrSuggestions.get(key).getAlternatives(); 278 suggestions.put(key, l); 279 } catch (JSONException e) { 280 LOG.debug("Exception while converting Solr spellcheckresponse to JSON. ", e); 281 } 282 } 283 } 284 285 return suggestions; 286 } 287 288 /** 289 * Returns the result of the performed spellcheck formatted in JSON. 290 * 291 * @param request The CmsSpellcheckingRequest. 292 * @return JSONObject that contains the result of the performed spellcheck. 293 */ 294 private JSONObject getJsonFormattedSpellcheckResult(CmsSpellcheckingRequest request) { 295 296 final JSONObject response = new JSONObject(); 297 298 try { 299 if (null != request.m_id) { 300 response.put(JSON_ID, request.m_id); 301 } 302 303 response.put(JSON_RESULT, request.m_wordSuggestions); 304 305 } catch (Exception e) { 306 try { 307 response.put(JSON_ERROR, true); 308 LOG.debug("Error while assembling spellcheck response in JSON format.", e); 309 } catch (JSONException ex) { 310 LOG.debug("Error while assembling spellcheck response in JSON format.", ex); 311 } 312 } 313 314 return response; 315 } 316 317 /** 318 * Returns the body of the request. This method is used to read posted JSON data. 319 * 320 * @param request The request. 321 * 322 * @return String representation of the request's body. 323 * 324 * @throws IOException in case reading the request fails 325 */ 326 private String getRequestBody(ServletRequest request) throws IOException { 327 328 final StringBuilder sb = new StringBuilder(); 329 330 String line = request.getReader().readLine(); 331 while (null != line) { 332 sb.append(line); 333 line = request.getReader().readLine(); 334 } 335 336 return sb.toString(); 337 } 338 339 /** 340 * Parse parameters from this request using HTTP. 341 * 342 * @param req The ServletRequest containing all request parameters. 343 * @param cms The OpenCms object. 344 * @return CmsSpellcheckingRequest object that contains parsed parameters. 345 */ 346 private CmsSpellcheckingRequest parseHttpRequest(final ServletRequest req, final CmsObject cms) { 347 348 if ((null != cms) && OpenCms.getRoleManager().hasRole(cms, CmsRole.ROOT_ADMIN)) { 349 try { 350 if (null != req.getParameter(HTTP_PARAMETER_CHECKREBUILD)) { 351 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(cms)) { 352 353 parseAndAddDictionaries(cms); 354 355 } 356 } 357 358 if (null != req.getParameter(HTTP_PARAMTER_REBUILD)) { 359 parseAndAddDictionaries(cms); 360 } 361 } catch (CmsRoleViolationException e) { 362 LOG.error(e.getLocalizedMessage(), e); 363 } 364 } 365 366 final String q = req.getParameter(HTTP_PARAMETER_WORDS); 367 368 if (null == q) { 369 LOG.debug("Invalid HTTP request: No parameter \"" + HTTP_PARAMETER_WORDS + "\" defined. "); 370 return null; 371 } 372 373 final StringTokenizer st = new StringTokenizer(q); 374 final List<String> wordsToCheck = new ArrayList<String>(); 375 while (st.hasMoreTokens()) { 376 final String word = st.nextToken(); 377 wordsToCheck.add(word); 378 379 if (Character.isUpperCase(word.codePointAt(0))) { 380 wordsToCheck.add(word.toLowerCase()); 381 } 382 } 383 384 final String[] w = wordsToCheck.toArray(new String[wordsToCheck.size()]); 385 final String dict = req.getParameter(HTTP_PARAMETER_LANG) == null 386 ? LANG_DEFAULT 387 : req.getParameter(HTTP_PARAMETER_LANG); 388 389 return new CmsSpellcheckingRequest(w, dict); 390 } 391 392 /** 393 * Parse JSON parameters from this request. 394 * 395 * @param jsonRequest The request in the JSON format. 396 * @return CmsSpellcheckingRequest object that contains parsed parameters or null, if JSON input is not well 397 * defined. 398 */ 399 private CmsSpellcheckingRequest parseJsonRequest(JSONObject jsonRequest) { 400 401 final String id = jsonRequest.optString(JSON_ID); 402 403 final JSONObject params = jsonRequest.optJSONObject(JSON_PARAMS); 404 405 if (null == params) { 406 LOG.debug("Invalid JSON request: No field \"params\" defined. "); 407 return null; 408 } 409 final JSONArray words = params.optJSONArray(JSON_WORDS); 410 final String lang = params.optString(JSON_LANG, LANG_DEFAULT); 411 if (null == words) { 412 LOG.debug("Invalid JSON request: No field \"words\" defined. "); 413 return null; 414 } 415 416 // Convert JSON array to array of type String 417 final List<String> wordsToCheck = new LinkedList<String>(); 418 for (int i = 0; i < words.length(); i++) { 419 final String word = words.opt(i).toString(); 420 wordsToCheck.add(word); 421 422 if (Character.isUpperCase(word.codePointAt(0))) { 423 wordsToCheck.add(word.toLowerCase()); 424 } 425 } 426 427 return new CmsSpellcheckingRequest(wordsToCheck.toArray(new String[wordsToCheck.size()]), lang, id); 428 } 429 430 /** 431 * Perform a security check against OpenCms. 432 * 433 * @param cms The OpenCms object. 434 * 435 * @throws CmsPermissionViolationException in case of the anonymous guest user 436 */ 437 private void performPermissionCheck(CmsObject cms) throws CmsPermissionViolationException { 438 439 if (cms.getRequestContext().getCurrentUser().isGuestUser()) { 440 throw new CmsPermissionViolationException(null); 441 } 442 } 443 444 /** 445 * Performs the actual spell check query using Solr. 446 * 447 * @param request the spell check request 448 * 449 * @return Results of the Solr spell check of type SpellCheckResponse or null if something goes wrong. 450 */ 451 private SpellCheckResponse performSpellcheckQuery(CmsSpellcheckingRequest request) { 452 453 if ((null == request) || !request.isInitialized()) { 454 return null; 455 } 456 457 final String[] wordsToCheck = request.m_wordsToCheck; 458 459 final ModifiableSolrParams params = new ModifiableSolrParams(); 460 params.set("spellcheck", "true"); 461 params.set("spellcheck.dictionary", request.m_dictionaryToUse); 462 params.set("spellcheck.extendedResults", "true"); 463 464 // Build one string from array of words and use it as query. 465 final StringBuilder builder = new StringBuilder(); 466 for (int i = 0; i < wordsToCheck.length; i++) { 467 builder.append(wordsToCheck[i] + " "); 468 } 469 470 params.set("spellcheck.q", builder.toString()); 471 472 final SolrQuery query = new SolrQuery(); 473 query.setRequestHandler("/spell"); 474 query.add(params); 475 476 try { 477 QueryResponse qres = m_solrClient.query(query); 478 return qres.getSpellCheckResponse(); 479 } catch (Exception e) { 480 LOG.debug("Exception while performing spellcheck query...", e); 481 } 482 483 return null; 484 } 485 486 /** 487 * Sends the JSON-formatted spellchecking results to the client. 488 * 489 * @param res The HttpServletResponse object. 490 * @param request The spellchecking request object. 491 * 492 * @throws IOException in case writing the response fails 493 */ 494 private void sendResponse(final HttpServletResponse res, final CmsSpellcheckingRequest request) throws IOException { 495 496 final PrintWriter pw = res.getWriter(); 497 final JSONObject response = getJsonFormattedSpellcheckResult(request); 498 pw.println(response.toString()); 499 pw.close(); 500 } 501 502 /** 503 * Sets the appropriate headers to response of this request. 504 * 505 * @param response The HttpServletResponse response object. 506 */ 507 private void setResponeHeaders(HttpServletResponse response) { 508 509 response.setHeader("Cache-Control", "no-store, no-cache"); 510 response.setHeader("Pragma", "no-cache"); 511 response.setDateHeader("Expires", System.currentTimeMillis()); 512 response.setContentType("text/plain; charset=utf-8"); 513 response.setCharacterEncoding("utf-8"); 514 } 515}