001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.util; 029 030import org.opencms.file.CmsObject; 031import org.opencms.main.CmsException; 032import org.opencms.xml.CmsXmlException; 033 034import java.io.BufferedReader; 035import java.io.IOException; 036import java.io.StringReader; 037import java.io.StringWriter; 038import java.util.StringTokenizer; 039 040import javax.xml.transform.Source; 041import javax.xml.transform.Transformer; 042import javax.xml.transform.TransformerFactory; 043import javax.xml.transform.stream.StreamResult; 044import javax.xml.transform.stream.StreamSource; 045 046/** 047 * Provides utility functions for XSLT transformations.<p> 048 * 049 * TODO: This class is apparently customer specific and should probably be removed from the core! 050 * 051 * @since 6.2.1 052 */ 053public final class CmsXsltUtil { 054 055 /** The delimiter to end a tag. */ 056 public static final String TAG_END_DELIMITER = ">"; 057 058 /** The delimiter to start a tag. */ 059 public static final String TAG_START_DELIMITER = "<"; 060 061 /** The delimiter to separate the text. */ 062 public static final char TEXT_DELIMITER = '"'; 063 064 /** the delimiters, the csv data can be separated with.*/ 065 static final String[] DELIMITERS = {";", ",", "\t"}; 066 067 /** 068 * Hides the public constructor.<p> 069 */ 070 private CmsXsltUtil() { 071 072 // noop 073 } 074 075 /** 076 * Returns the delimiter that most often occures in the CSV content and is therefore best applicable for the CSV data .<p> 077 * 078 * @param csvData the comma separated values 079 * 080 * @return the delimiter that is best applicable for the CSV data 081 */ 082 public static String getPreferredDelimiter(String csvData) { 083 084 String bestMatch = ""; 085 int bestMatchCount = 0; 086 // find for each delimiter, how often it occures in the String csvData 087 for (int i = 0; i < DELIMITERS.length; i++) { 088 int currentCount = csvData.split(DELIMITERS[i]).length; 089 if (currentCount > bestMatchCount) { 090 bestMatch = DELIMITERS[i]; 091 bestMatchCount = currentCount; 092 } 093 } 094 return bestMatch; 095 } 096 097 /** 098 * Changes content from CSV to xml/html.<p> 099 * 100 * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly. 101 * 102 * @param cms the cms object 103 * @param xsltFile the XSLT transformation file 104 * @param csvContent the csv content to transform 105 * @param delimiter delimiter used to separate csv fields 106 * 107 * @return the transformed xml 108 * 109 * @throws CmsXmlException if something goes wrong 110 * @throws CmsException if something goes wrong 111 */ 112 public static String transformCsvContent(CmsObject cms, String xsltFile, String csvContent, String delimiter) 113 throws CmsException, CmsXmlException { 114 115 String xmlContent = ""; 116 try { 117 xmlContent = getTableHtml(csvContent, delimiter); 118 } catch (IOException e) { 119 throw new CmsXmlException(Messages.get().container(Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0)); 120 } 121 122 // if xslt file parameter is set, transform the raw html and set the css stylesheet property 123 // of the converted file to that of the stylesheet 124 if (xsltFile != null) { 125 xmlContent = transformXmlContent(cms, xsltFile, xmlContent); 126 } 127 128 return xmlContent; 129 } 130 131 /** 132 * Applies a XSLT Transformation to the content.<p> 133 * 134 * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly. 135 * 136 * @param cms the cms object 137 * @param xsltFile the XSLT transformation file 138 * @param xmlContent the XML content to transform 139 * 140 * @return the transformed xml 141 * 142 * @throws CmsXmlException if something goes wrong 143 * @throws CmsException if something goes wrong 144 */ 145 public static String transformXmlContent(CmsObject cms, String xsltFile, String xmlContent) 146 throws CmsException, CmsXmlException { 147 148 // JAXP reads data 149 Source xmlSource = new StreamSource(new StringReader(xmlContent)); 150 String xsltString = new String(cms.readFile(xsltFile).getContents()); 151 Source xsltSource = new StreamSource(new StringReader(xsltString)); 152 String result = null; 153 154 try { 155 TransformerFactory transFact = TransformerFactory.newInstance(); 156 Transformer trans = transFact.newTransformer(xsltSource); 157 158 StringWriter writer = new StringWriter(); 159 trans.transform(xmlSource, new StreamResult(writer)); 160 result = writer.toString(); 161 } catch (Exception exc) { 162 throw new CmsXmlException(Messages.get().container(Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0)); 163 } 164 165 // cut of the prefacing declaration '<?xml version="1.0" encoding="UTF-8"?>' 166 if (result.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")) { 167 return result.substring(38); 168 } else { 169 return result; 170 } 171 } 172 173 /** 174 * Converts a delimiter separated format string int o colgroup html fragment.<p> 175 * 176 * @param formatString the formatstring to convert 177 * @param delimiter the delimiter the formats (l,r or c) are delimited with 178 * 179 * @return the resulting colgroup HTML 180 */ 181 private static String getColGroup(String formatString, String delimiter) { 182 183 StringBuffer colgroup = new StringBuffer(128); 184 String[] formatStrings = formatString.split(delimiter); 185 colgroup.append("<colgroup>"); 186 for (int i = 0; i < formatStrings.length; i++) { 187 colgroup.append("<col align=\""); 188 char align = formatStrings[i].trim().charAt(0); 189 switch (align) { 190 case 'l': 191 colgroup.append("left"); 192 break; 193 case 'c': 194 colgroup.append("center"); 195 break; 196 case 'r': 197 colgroup.append("right"); 198 break; 199 default: 200 throw new RuntimeException("invalid format option"); 201 } 202 colgroup.append("\"/>"); 203 } 204 return colgroup.append("</colgroup>").toString(); 205 } 206 207 /** 208 * Converts CSV data to XML.<p> 209 * 210 * @return a XML representation of the CSV data 211 * 212 * @param csvData the CSV data to convert 213 * @param delimiter the delimiter to separate the values with 214 * 215 * @throws IOException if there is an IO problem 216 */ 217 private static String getTableHtml(String csvData, String delimiter) throws IOException { 218 219 String lineSeparator = System.getProperty("line.separator"); 220 int tmpindex = csvData.indexOf(lineSeparator); 221 String formatString = (tmpindex >= 0) ? csvData.substring(0, tmpindex) : csvData; 222 223 if (delimiter == null) { 224 delimiter = getPreferredDelimiter(csvData); 225 } 226 227 StringBuffer xml = new StringBuffer("<table>"); 228 if (isFormattingInformation(formatString, delimiter)) { 229 // transform formatting to HTML colgroup 230 xml.append(getColGroup(formatString, delimiter)); 231 // cut of first line 232 csvData = csvData.substring(formatString.length() + lineSeparator.length()); 233 } 234 235 String line; 236 BufferedReader br = new BufferedReader(new StringReader(csvData)); 237 while ((line = br.readLine()) != null) { 238 xml.append("<tr>\n"); 239 240 // must use tokenizer with delimiters include in order to handle empty cells appropriately 241 StringTokenizer t = new StringTokenizer(line, delimiter, true); 242 boolean hasValue = false; 243 while (t.hasMoreElements()) { 244 String item = (String)t.nextElement(); 245 if (!hasValue) { 246 xml.append("\t<td>"); 247 hasValue = true; 248 } 249 if (!item.equals(delimiter)) { 250 251 // remove enclosing delimiters 252 item = removeStringDelimiters(item); 253 254 // in order to allow links, lines starting and ending with tag delimiters (< ...>) remains unescaped 255 if (item.startsWith(TAG_START_DELIMITER) && item.endsWith(TAG_END_DELIMITER)) { 256 xml.append(item); 257 } else { 258 xml.append(CmsStringUtil.escapeHtml(item)); 259 } 260 } else { 261 xml.append("</td>\n"); 262 hasValue = false; 263 } 264 } 265 if (hasValue) { 266 xml.append("</td>\n"); 267 } else { 268 xml.append("<td></td>\n"); 269 } 270 271 xml.append("</tr>\n"); 272 } 273 274 return xml.append("</table>").toString(); 275 } 276 277 /** 278 * Tests if the given string is a <code>delimiter</code> separated list of formatting information.<p> 279 * 280 * @param formatString the string to check 281 * @param delimiter the list separators 282 * 283 * @return true if the string is a <code>delimiter</code> separated list of Formatting Information 284 */ 285 private static boolean isFormattingInformation(String formatString, String delimiter) { 286 287 String[] formatStrings = formatString.split(delimiter); 288 for (int i = 0; i < formatStrings.length; i++) { 289 if (!formatStrings[i].trim().matches("[lcr]")) { 290 return false; 291 } 292 } 293 return true; 294 } 295 296 /** 297 * Removes the string delimiters from a key (as well as any white space 298 * outside the delimiters).<p> 299 * 300 * @param key the key (including delimiters) 301 * 302 * @return the key without delimiters 303 */ 304 private static String removeStringDelimiters(String key) { 305 306 String k = key.trim(); 307 if (CmsStringUtil.isNotEmpty(k)) { 308 if (k.charAt(0) == TEXT_DELIMITER) { 309 k = k.substring(1); 310 } 311 if (k.charAt(k.length() - 1) == TEXT_DELIMITER) { 312 k = k.substring(0, k.length() - 1); 313 } 314 } 315 // replace excel protected quotations marks ("") by single quotation marks 316 k = CmsStringUtil.substitute(k, "\"\"", "\""); 317 return k; 318 } 319}