001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import org.opencms.file.CmsObject;
031import org.opencms.main.CmsException;
032import org.opencms.xml.CmsXmlException;
033
034import java.io.BufferedReader;
035import java.io.IOException;
036import java.io.StringReader;
037import java.io.StringWriter;
038import java.util.StringTokenizer;
039
040import javax.xml.transform.Source;
041import javax.xml.transform.Transformer;
042import javax.xml.transform.TransformerFactory;
043import javax.xml.transform.stream.StreamResult;
044import javax.xml.transform.stream.StreamSource;
045
046/**
047 * Provides utility functions for XSLT transformations.<p>
048 *
049 * TODO: This class is apparently customer specific and should probably be removed from the core!
050 *
051 * @since 6.2.1
052 */
053public final class CmsXsltUtil {
054
055    /** The delimiter to end a tag. */
056    public static final String TAG_END_DELIMITER = ">";
057
058    /** The delimiter to start a tag. */
059    public static final String TAG_START_DELIMITER = "<";
060
061    /** The delimiter to separate the text. */
062    public static final char TEXT_DELIMITER = '"';
063
064    /** the delimiters, the csv data can be separated with.*/
065    static final String[] DELIMITERS = {";", ",", "\t"};
066
067    /**
068     * Hides the public constructor.<p>
069     */
070    private CmsXsltUtil() {
071
072        // noop
073    }
074
075    /**
076     * Returns the delimiter that most often occures in the CSV content and is therefore best applicable for the CSV data .<p>
077     *
078     * @param csvData the comma separated values
079     *
080     * @return the delimiter that is best applicable for the CSV data
081     */
082    public static String getPreferredDelimiter(String csvData) {
083
084        String bestMatch = "";
085        int bestMatchCount = 0;
086        // find for each delimiter, how often it occures in the String csvData
087        for (int i = 0; i < DELIMITERS.length; i++) {
088            int currentCount = csvData.split(DELIMITERS[i]).length;
089            if (currentCount > bestMatchCount) {
090                bestMatch = DELIMITERS[i];
091                bestMatchCount = currentCount;
092            }
093        }
094        return bestMatch;
095    }
096
097    /**
098     * Changes content from CSV to xml/html.<p>
099     *
100     * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly.
101     *
102     * @param cms the cms object
103     * @param xsltFile the XSLT transformation file
104     * @param csvContent the csv content to transform
105     * @param delimiter delimiter used to separate csv fields
106     *
107     * @return the transformed xml
108     *
109     * @throws CmsXmlException if something goes wrong
110     * @throws CmsException if something goes wrong
111     */
112    public static String transformCsvContent(CmsObject cms, String xsltFile, String csvContent, String delimiter)
113    throws CmsException, CmsXmlException {
114
115        String xmlContent = "";
116        try {
117            xmlContent = getTableHtml(csvContent, delimiter);
118        } catch (IOException e) {
119            throw new CmsXmlException(Messages.get().container(Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0));
120        }
121
122        // if xslt file parameter is set, transform the raw html and set the css stylesheet property
123        // of the converted file to that of the stylesheet
124        if (xsltFile != null) {
125            xmlContent = transformXmlContent(cms, xsltFile, xmlContent);
126        }
127
128        return xmlContent;
129    }
130
131    /**
132     * Applies a XSLT Transformation to the content.<p>
133     *
134     * The method does not use DOM4J, because iso-8859-1 code ist not transformed correctly.
135     *
136     * @param cms the cms object
137     * @param xsltFile the XSLT transformation file
138     * @param xmlContent the XML content to transform
139     *
140     * @return the transformed xml
141     *
142     * @throws CmsXmlException if something goes wrong
143     * @throws CmsException if something goes wrong
144     */
145    public static String transformXmlContent(CmsObject cms, String xsltFile, String xmlContent)
146    throws CmsException, CmsXmlException {
147
148        // JAXP reads data
149        Source xmlSource = new StreamSource(new StringReader(xmlContent));
150        String xsltString = new String(cms.readFile(xsltFile).getContents());
151        Source xsltSource = new StreamSource(new StringReader(xsltString));
152        String result = null;
153
154        try {
155            TransformerFactory transFact = TransformerFactory.newInstance();
156            Transformer trans = transFact.newTransformer(xsltSource);
157
158            StringWriter writer = new StringWriter();
159            trans.transform(xmlSource, new StreamResult(writer));
160            result = writer.toString();
161        } catch (Exception exc) {
162            throw new CmsXmlException(Messages.get().container(Messages.ERR_CSV_XML_TRANSFORMATION_FAILED_0));
163        }
164
165        // cut of the prefacing declaration '<?xml version="1.0" encoding="UTF-8"?>'
166        if (result.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")) {
167            return result.substring(38);
168        } else {
169            return result;
170        }
171    }
172
173    /**
174     * Converts a delimiter separated format string int o colgroup html fragment.<p>
175     *
176     * @param formatString the formatstring to convert
177     * @param delimiter the delimiter the formats (l,r or c) are delimited with
178     *
179     * @return the resulting colgroup HTML
180     */
181    private static String getColGroup(String formatString, String delimiter) {
182
183        StringBuffer colgroup = new StringBuffer(128);
184        String[] formatStrings = formatString.split(delimiter);
185        colgroup.append("<colgroup>");
186        for (int i = 0; i < formatStrings.length; i++) {
187            colgroup.append("<col align=\"");
188            char align = formatStrings[i].trim().charAt(0);
189            switch (align) {
190                case 'l':
191                    colgroup.append("left");
192                    break;
193                case 'c':
194                    colgroup.append("center");
195                    break;
196                case 'r':
197                    colgroup.append("right");
198                    break;
199                default:
200                    throw new RuntimeException("invalid format option");
201            }
202            colgroup.append("\"/>");
203        }
204        return colgroup.append("</colgroup>").toString();
205    }
206
207    /**
208     * Converts CSV data to XML.<p>
209     *
210     * @return a XML representation of the CSV data
211     *
212     * @param csvData the CSV data to convert
213     * @param delimiter the delimiter to separate the values with
214     *
215     * @throws IOException if there is an IO problem
216     */
217    private static String getTableHtml(String csvData, String delimiter) throws IOException {
218
219        String lineSeparator = System.getProperty("line.separator");
220        int tmpindex = csvData.indexOf(lineSeparator);
221        String formatString = (tmpindex >= 0) ? csvData.substring(0, tmpindex) : csvData;
222
223        if (delimiter == null) {
224            delimiter = getPreferredDelimiter(csvData);
225        }
226
227        StringBuffer xml = new StringBuffer("<table>");
228        if (isFormattingInformation(formatString, delimiter)) {
229            // transform formatting to HTML colgroup
230            xml.append(getColGroup(formatString, delimiter));
231            // cut of first line
232            csvData = csvData.substring(formatString.length() + lineSeparator.length());
233        }
234
235        String line;
236        BufferedReader br = new BufferedReader(new StringReader(csvData));
237        while ((line = br.readLine()) != null) {
238            xml.append("<tr>\n");
239
240            // must use tokenizer with delimiters include in order to handle empty cells appropriately
241            StringTokenizer t = new StringTokenizer(line, delimiter, true);
242            boolean hasValue = false;
243            while (t.hasMoreElements()) {
244                String item = (String)t.nextElement();
245                if (!hasValue) {
246                    xml.append("\t<td>");
247                    hasValue = true;
248                }
249                if (!item.equals(delimiter)) {
250
251                    // remove enclosing delimiters
252                    item = removeStringDelimiters(item);
253
254                    // in order to allow links, lines starting and ending with tag delimiters (< ...>) remains unescaped
255                    if (item.startsWith(TAG_START_DELIMITER) && item.endsWith(TAG_END_DELIMITER)) {
256                        xml.append(item);
257                    } else {
258                        xml.append(CmsStringUtil.escapeHtml(item));
259                    }
260                } else {
261                    xml.append("</td>\n");
262                    hasValue = false;
263                }
264            }
265            if (hasValue) {
266                xml.append("</td>\n");
267            } else {
268                xml.append("<td></td>\n");
269            }
270
271            xml.append("</tr>\n");
272        }
273
274        return xml.append("</table>").toString();
275    }
276
277    /**
278     * Tests if the given string is a <code>delimiter</code> separated list of formatting information.<p>
279     *
280     * @param formatString the string to check
281     * @param delimiter the list separators
282     *
283     * @return true if the string is a <code>delimiter</code> separated list of Formatting Information
284     */
285    private static boolean isFormattingInformation(String formatString, String delimiter) {
286
287        String[] formatStrings = formatString.split(delimiter);
288        for (int i = 0; i < formatStrings.length; i++) {
289            if (!formatStrings[i].trim().matches("[lcr]")) {
290                return false;
291            }
292        }
293        return true;
294    }
295
296    /**
297     * Removes the string delimiters from a key (as well as any white space
298     * outside the delimiters).<p>
299     *
300     * @param key the key (including delimiters)
301     *
302     * @return the key without delimiters
303     */
304    private static String removeStringDelimiters(String key) {
305
306        String k = key.trim();
307        if (CmsStringUtil.isNotEmpty(k)) {
308            if (k.charAt(0) == TEXT_DELIMITER) {
309                k = k.substring(1);
310            }
311            if (k.charAt(k.length() - 1) == TEXT_DELIMITER) {
312                k = k.substring(0, k.length() - 1);
313            }
314        }
315        // replace excel protected quotations marks ("") by single quotation marks
316        k = CmsStringUtil.substitute(k, "\"\"", "\"");
317        return k;
318    }
319}