001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 *
027 * This file is based on:
028 * org.json.XMLTokener
029 * from the JSON in Java implementation.
030 *
031 * Copyright (c) 2002 JSON.org
032 *
033 * Permission is hereby granted, free of charge, to any person obtaining a copy
034 * of this software and associated documentation files (the "Software"), to deal
035 * in the Software without restriction, including without limitation the rights
036 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
037 * copies of the Software, and to permit persons to whom the Software is
038 * furnished to do so, subject to the following conditions:
039 *
040 * The above copyright notice and this permission notice shall be included in all
041 * copies or substantial portions of the Software.
042 *
043 * The Software shall be used for Good, not Evil.
044 *
045 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
046 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
047 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
048 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
049 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
050 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
051 * SOFTWARE.
052 */
053
054package org.opencms.json;
055
056/**
057 * The XMLTokener extends the JSONTokener to provide additional methods
058 * for the parsing of XML texts.<p>
059 *
060 */
061public class XMLTokener extends JSONTokener {
062
063    /** The table of ENTITY values. It initially contains Character values for
064     * amp, apos, gt, lt, quot.
065     */
066    public static final java.util.HashMap<String, Character> ENTITY;
067
068    /**
069     * Construct an XMLTokener from a string.<p>
070     *
071     * @param s a source string
072     */
073    public XMLTokener(String s) {
074
075        super(s);
076    }
077
078    static {
079        ENTITY = new java.util.HashMap<String, Character>(8);
080        ENTITY.put("amp", XML.AMP);
081        ENTITY.put("apos", XML.APOS);
082        ENTITY.put("gt", XML.GT);
083        ENTITY.put("lt", XML.LT);
084        ENTITY.put("quot", XML.QUOT);
085    }
086
087    /**
088     * Get the text in the CDATA block.<p>
089     *
090     * @return the string up to the <code>]]&gt;</code>
091     * @throws JSONException if the <code>]]&gt;</code> is not found
092     */
093    public String nextCDATA() throws JSONException {
094
095        char c;
096        int i;
097        StringBuffer sb = new StringBuffer();
098        for (;;) {
099            c = next();
100            if (c == 0) {
101                throw syntaxError("Unclosed CDATA");
102            }
103            sb.append(c);
104            i = sb.length() - 3;
105            if ((i >= 0) && (sb.charAt(i) == ']') && (sb.charAt(i + 1) == ']') && (sb.charAt(i + 2) == '>')) {
106                sb.setLength(i);
107                return sb.toString();
108            }
109        }
110    }
111
112    /**
113     * Get the next XML outer token, trimming whitespace.<p>
114     *
115     * There are two kinds of tokens: the '<' character which begins a markup tag, and the content
116     * text between markup tags.<p>
117     *
118     * @return  a string, or a '<' Character, or null if there is no more source text
119     * @throws JSONException if something goes wrong
120     */
121    public Object nextContent() throws JSONException {
122
123        char c;
124        StringBuffer sb;
125        do {
126            c = next();
127        } while (Character.isWhitespace(c));
128        if (c == 0) {
129            return null;
130        }
131        if (c == '<') {
132            return XML.LT;
133        }
134        sb = new StringBuffer();
135        for (;;) {
136            if ((c == '<') || (c == 0)) {
137                back();
138                return sb.toString().trim();
139            }
140            if (c == '&') {
141                sb.append(nextEntity(c));
142            } else {
143                sb.append(c);
144            }
145            c = next();
146        }
147    }
148
149    /**
150     * Return the next ENTITY. These entities are translated to Characters:
151     *     <code>&amp;  &apos;  &gt;  &lt;  &quot;</code>.<p>
152     *
153     * @param a an ampersand character
154     * @return  a Character or an entity String if the entity is not recognized
155     * @throws JSONException if missing ';' in XML entity
156     */
157    public Object nextEntity(char a) throws JSONException {
158
159        StringBuffer sb = new StringBuffer();
160        for (;;) {
161            char c = next();
162            if (Character.isLetterOrDigit(c) || (c == '#')) {
163                sb.append(Character.toLowerCase(c));
164            } else if (c == ';') {
165                break;
166            } else {
167                throw syntaxError("Missing ';' in XML ENTITY: &" + sb);
168            }
169        }
170        String s = sb.toString();
171        Object e = ENTITY.get(s);
172        return e != null ? e : a + s + ";";
173    }
174
175    /**
176     * Returns the next XML meta token. This is used for skipping over <!...>
177     * and <?...?> structures.<p>
178     *
179     * @return syntax characters (<code>< > / = ! ?</code>) are returned as
180     *  Character, and strings and names are returned as Boolean. We don't care
181     *  what the values actually are
182     * @throws JSONException if a string is not properly closed or if the XML
183     *  is badly structured
184     */
185    public Object nextMeta() throws JSONException {
186
187        char c;
188        char q;
189        do {
190            c = next();
191        } while (Character.isWhitespace(c));
192        switch (c) {
193            case 0:
194                throw syntaxError("Misshaped meta tag");
195            case '<':
196                return XML.LT;
197            case '>':
198                return XML.GT;
199            case '/':
200                return XML.SLASH;
201            case '=':
202                return XML.EQ;
203            case '!':
204                return XML.BANG;
205            case '?':
206                return XML.QUEST;
207            case '"':
208            case '\'':
209                q = c;
210                for (;;) {
211                    c = next();
212                    if (c == 0) {
213                        throw syntaxError("Unterminated string");
214                    }
215                    if (c == q) {
216                        return Boolean.TRUE;
217                    }
218                }
219            default:
220                for (;;) {
221                    c = next();
222                    if (Character.isWhitespace(c)) {
223                        return Boolean.TRUE;
224                    }
225                    switch (c) {
226                        case 0:
227                        case '<':
228                        case '>':
229                        case '/':
230                        case '=':
231                        case '!':
232                        case '?':
233                        case '"':
234                        case '\'':
235                            back();
236                            return Boolean.TRUE;
237                        default:
238                    }
239                }
240        }
241    }
242
243    /**
244     * Get the next XML Token.<p>
245     *
246     * These tokens are found inside of angle
247     * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it
248     * may be a string wrapped in single quotes or double quotes, or it may be a
249     * name.<p>
250     *
251     * @return a string or a Character
252     * @throws JSONException if the XML is not well formed
253     */
254    public Object nextToken() throws JSONException {
255
256        char c;
257        char q;
258        StringBuffer sb;
259        do {
260            c = next();
261        } while (Character.isWhitespace(c));
262        switch (c) {
263            case 0:
264                throw syntaxError("Misshaped element");
265            case '<':
266                throw syntaxError("Misplaced '<'");
267            case '>':
268                return XML.GT;
269            case '/':
270                return XML.SLASH;
271            case '=':
272                return XML.EQ;
273            case '!':
274                return XML.BANG;
275            case '?':
276                return XML.QUEST;
277
278            // Quoted string
279
280            case '"':
281            case '\'':
282                q = c;
283                sb = new StringBuffer();
284                for (;;) {
285                    c = next();
286                    if (c == 0) {
287                        throw syntaxError("Unterminated string");
288                    }
289                    if (c == q) {
290                        return sb.toString();
291                    }
292                    if (c == '&') {
293                        sb.append(nextEntity(c));
294                    } else {
295                        sb.append(c);
296                    }
297                }
298            default:
299
300                // Name
301
302                sb = new StringBuffer();
303                for (;;) {
304                    sb.append(c);
305                    c = next();
306                    if (Character.isWhitespace(c)) {
307                        return sb.toString();
308                    }
309                    switch (c) {
310                        case 0:
311                            return sb.toString();
312                        case '>':
313                        case '/':
314                        case '=':
315                        case '!':
316                        case '?':
317                        case '[':
318                        case ']':
319                            back();
320                            return sb.toString();
321                        case '<':
322                        case '"':
323                        case '\'':
324                            throw syntaxError("Bad character in a name");
325                        default:
326                    }
327                }
328        }
329    }
330
331    /**
332     * Skip characters until past the requested string.<p>
333     *
334     * If it is not found, we are left at the end of the source with a result of false.<p>
335     *
336     * @param to a string to skip past
337     * @return the truth
338     * @throws JSONException if something goes wrong
339     */
340    public boolean skipPast(String to) throws JSONException {
341
342        boolean b;
343        char c;
344        int i;
345        int j;
346        int offset = 0;
347        int n = to.length();
348        char[] circle = new char[n];
349
350        /*
351         * First fill the circle buffer with as many characters as are in the
352         * to string. If we reach an early end, bail.
353         */
354
355        for (i = 0; i < n; i += 1) {
356            c = next();
357            if (c == 0) {
358                return false;
359            }
360            circle[i] = c;
361        }
362        /*
363         * We will loop, possibly for all of the remaining characters.
364         */
365        for (;;) {
366            j = offset;
367            b = true;
368            /*
369             * Compare the circle buffer with the to string.
370             */
371            for (i = 0; i < n; i += 1) {
372                if (circle[j] != to.charAt(i)) {
373                    b = false;
374                    break;
375                }
376                j += 1;
377                if (j >= n) {
378                    j -= n;
379                }
380            }
381            /*
382             * If we exit the loop with b intact, then victory is ours.
383             */
384            if (b) {
385                return true;
386            }
387            /*
388             * Get the next character. If there isn't one, then defeat is ours.
389             */
390            c = next();
391            if (c == 0) {
392                return false;
393            }
394            /*
395             * Shove the character in the circle buffer and advance the
396             * circle offset. The offset is mod n.
397             */
398            circle[offset] = c;
399            offset += 1;
400            if (offset >= n) {
401                offset -= n;
402            }
403        }
404    }
405}