001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 * 027 * This file is based on: 028 * org.json.XMLTokener 029 * from the JSON in Java implementation. 030 * 031 * Copyright (c) 2002 JSON.org 032 * 033 * Permission is hereby granted, free of charge, to any person obtaining a copy 034 * of this software and associated documentation files (the "Software"), to deal 035 * in the Software without restriction, including without limitation the rights 036 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 037 * copies of the Software, and to permit persons to whom the Software is 038 * furnished to do so, subject to the following conditions: 039 * 040 * The above copyright notice and this permission notice shall be included in all 041 * copies or substantial portions of the Software. 042 * 043 * The Software shall be used for Good, not Evil. 044 * 045 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 046 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 047 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 048 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 049 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 050 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 051 * SOFTWARE. 052 */ 053 054package org.opencms.json; 055 056/** 057 * The XMLTokener extends the JSONTokener to provide additional methods 058 * for the parsing of XML texts.<p> 059 * 060 */ 061public class XMLTokener extends JSONTokener { 062 063 /** The table of ENTITY values. It initially contains Character values for 064 * amp, apos, gt, lt, quot. 065 */ 066 public static final java.util.HashMap<String, Character> ENTITY; 067 068 /** 069 * Construct an XMLTokener from a string.<p> 070 * 071 * @param s a source string 072 */ 073 public XMLTokener(String s) { 074 075 super(s); 076 } 077 078 static { 079 ENTITY = new java.util.HashMap<String, Character>(8); 080 ENTITY.put("amp", XML.AMP); 081 ENTITY.put("apos", XML.APOS); 082 ENTITY.put("gt", XML.GT); 083 ENTITY.put("lt", XML.LT); 084 ENTITY.put("quot", XML.QUOT); 085 } 086 087 /** 088 * Get the text in the CDATA block.<p> 089 * 090 * @return the string up to the <code>]]></code> 091 * @throws JSONException if the <code>]]></code> is not found 092 */ 093 public String nextCDATA() throws JSONException { 094 095 char c; 096 int i; 097 StringBuffer sb = new StringBuffer(); 098 for (;;) { 099 c = next(); 100 if (c == 0) { 101 throw syntaxError("Unclosed CDATA"); 102 } 103 sb.append(c); 104 i = sb.length() - 3; 105 if ((i >= 0) && (sb.charAt(i) == ']') && (sb.charAt(i + 1) == ']') && (sb.charAt(i + 2) == '>')) { 106 sb.setLength(i); 107 return sb.toString(); 108 } 109 } 110 } 111 112 /** 113 * Get the next XML outer token, trimming whitespace.<p> 114 * 115 * There are two kinds of tokens: the '<' character which begins a markup tag, and the content 116 * text between markup tags.<p> 117 * 118 * @return a string, or a '<' Character, or null if there is no more source text 119 * @throws JSONException if something goes wrong 120 */ 121 public Object nextContent() throws JSONException { 122 123 char c; 124 StringBuffer sb; 125 do { 126 c = next(); 127 } while (Character.isWhitespace(c)); 128 if (c == 0) { 129 return null; 130 } 131 if (c == '<') { 132 return XML.LT; 133 } 134 sb = new StringBuffer(); 135 for (;;) { 136 if ((c == '<') || (c == 0)) { 137 back(); 138 return sb.toString().trim(); 139 } 140 if (c == '&') { 141 sb.append(nextEntity(c)); 142 } else { 143 sb.append(c); 144 } 145 c = next(); 146 } 147 } 148 149 /** 150 * Return the next ENTITY. These entities are translated to Characters: 151 * <code>& ' > < "</code>.<p> 152 * 153 * @param a an ampersand character 154 * @return a Character or an entity String if the entity is not recognized 155 * @throws JSONException if missing ';' in XML entity 156 */ 157 public Object nextEntity(char a) throws JSONException { 158 159 StringBuffer sb = new StringBuffer(); 160 for (;;) { 161 char c = next(); 162 if (Character.isLetterOrDigit(c) || (c == '#')) { 163 sb.append(Character.toLowerCase(c)); 164 } else if (c == ';') { 165 break; 166 } else { 167 throw syntaxError("Missing ';' in XML ENTITY: &" + sb); 168 } 169 } 170 String s = sb.toString(); 171 Object e = ENTITY.get(s); 172 return e != null ? e : a + s + ";"; 173 } 174 175 /** 176 * Returns the next XML meta token. This is used for skipping over <!...> 177 * and <?...?> structures.<p> 178 * 179 * @return syntax characters (<code>< > / = ! ?</code>) are returned as 180 * Character, and strings and names are returned as Boolean. We don't care 181 * what the values actually are 182 * @throws JSONException if a string is not properly closed or if the XML 183 * is badly structured 184 */ 185 public Object nextMeta() throws JSONException { 186 187 char c; 188 char q; 189 do { 190 c = next(); 191 } while (Character.isWhitespace(c)); 192 switch (c) { 193 case 0: 194 throw syntaxError("Misshaped meta tag"); 195 case '<': 196 return XML.LT; 197 case '>': 198 return XML.GT; 199 case '/': 200 return XML.SLASH; 201 case '=': 202 return XML.EQ; 203 case '!': 204 return XML.BANG; 205 case '?': 206 return XML.QUEST; 207 case '"': 208 case '\'': 209 q = c; 210 for (;;) { 211 c = next(); 212 if (c == 0) { 213 throw syntaxError("Unterminated string"); 214 } 215 if (c == q) { 216 return Boolean.TRUE; 217 } 218 } 219 default: 220 for (;;) { 221 c = next(); 222 if (Character.isWhitespace(c)) { 223 return Boolean.TRUE; 224 } 225 switch (c) { 226 case 0: 227 case '<': 228 case '>': 229 case '/': 230 case '=': 231 case '!': 232 case '?': 233 case '"': 234 case '\'': 235 back(); 236 return Boolean.TRUE; 237 default: 238 } 239 } 240 } 241 } 242 243 /** 244 * Get the next XML Token.<p> 245 * 246 * These tokens are found inside of angle 247 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it 248 * may be a string wrapped in single quotes or double quotes, or it may be a 249 * name.<p> 250 * 251 * @return a string or a Character 252 * @throws JSONException if the XML is not well formed 253 */ 254 public Object nextToken() throws JSONException { 255 256 char c; 257 char q; 258 StringBuffer sb; 259 do { 260 c = next(); 261 } while (Character.isWhitespace(c)); 262 switch (c) { 263 case 0: 264 throw syntaxError("Misshaped element"); 265 case '<': 266 throw syntaxError("Misplaced '<'"); 267 case '>': 268 return XML.GT; 269 case '/': 270 return XML.SLASH; 271 case '=': 272 return XML.EQ; 273 case '!': 274 return XML.BANG; 275 case '?': 276 return XML.QUEST; 277 278 // Quoted string 279 280 case '"': 281 case '\'': 282 q = c; 283 sb = new StringBuffer(); 284 for (;;) { 285 c = next(); 286 if (c == 0) { 287 throw syntaxError("Unterminated string"); 288 } 289 if (c == q) { 290 return sb.toString(); 291 } 292 if (c == '&') { 293 sb.append(nextEntity(c)); 294 } else { 295 sb.append(c); 296 } 297 } 298 default: 299 300 // Name 301 302 sb = new StringBuffer(); 303 for (;;) { 304 sb.append(c); 305 c = next(); 306 if (Character.isWhitespace(c)) { 307 return sb.toString(); 308 } 309 switch (c) { 310 case 0: 311 return sb.toString(); 312 case '>': 313 case '/': 314 case '=': 315 case '!': 316 case '?': 317 case '[': 318 case ']': 319 back(); 320 return sb.toString(); 321 case '<': 322 case '"': 323 case '\'': 324 throw syntaxError("Bad character in a name"); 325 default: 326 } 327 } 328 } 329 } 330 331 /** 332 * Skip characters until past the requested string.<p> 333 * 334 * If it is not found, we are left at the end of the source with a result of false.<p> 335 * 336 * @param to a string to skip past 337 * @return the truth 338 * @throws JSONException if something goes wrong 339 */ 340 public boolean skipPast(String to) throws JSONException { 341 342 boolean b; 343 char c; 344 int i; 345 int j; 346 int offset = 0; 347 int n = to.length(); 348 char[] circle = new char[n]; 349 350 /* 351 * First fill the circle buffer with as many characters as are in the 352 * to string. If we reach an early end, bail. 353 */ 354 355 for (i = 0; i < n; i += 1) { 356 c = next(); 357 if (c == 0) { 358 return false; 359 } 360 circle[i] = c; 361 } 362 /* 363 * We will loop, possibly for all of the remaining characters. 364 */ 365 for (;;) { 366 j = offset; 367 b = true; 368 /* 369 * Compare the circle buffer with the to string. 370 */ 371 for (i = 0; i < n; i += 1) { 372 if (circle[j] != to.charAt(i)) { 373 b = false; 374 break; 375 } 376 j += 1; 377 if (j >= n) { 378 j -= n; 379 } 380 } 381 /* 382 * If we exit the loop with b intact, then victory is ours. 383 */ 384 if (b) { 385 return true; 386 } 387 /* 388 * Get the next character. If there isn't one, then defeat is ours. 389 */ 390 c = next(); 391 if (c == 0) { 392 return false; 393 } 394 /* 395 * Shove the character in the circle buffer and advance the 396 * circle offset. The offset is mod n. 397 */ 398 circle[offset] = c; 399 offset += 1; 400 if (offset >= n) { 401 offset -= n; 402 } 403 } 404 } 405}