001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import java.net.URI;
031import java.net.URISyntaxException;
032
033import org.apache.http.client.utils.URIBuilder;
034
035/**
036 * Splits an URI String into separate components.<p>
037 *
038 * An URI is splitted into a <code>prefix</code>, a <code>anchor</code> and a <code>query</code> part.
039 */
040public class CmsUriSplitter {
041
042    /** Empty (non null) StringBuffer constant. */
043    private static final StringBuffer EMPTY_BUFFER = new StringBuffer(0);
044
045    /** The anchor part of the URI, for example <code>someanchor</code>. */
046    private String m_anchor;
047
048    /** Indicates if 'strict' URI parsing did produce an error. */
049    private boolean m_errorFree;
050
051    /** Indicates if 'strict' URI parsing was used. */
052    private boolean m_isStrict;
053
054    /** The URI protocol, for example <code>http</code> or <code>https</code>. */
055    private String m_protocol;
056
057    /** The prefix part of the URI, for example <code>http://www.opencms.org/some/path/</code>. */
058    private String m_prefix;
059
060    /** The query part of the URI, for example <code>a=b&c=d</code>. */
061    private String m_query;
062
063    /** The suffix part of the uri. */
064    private String m_suffix;
065
066    /** The original URI String that was split. */
067    private String m_uri;
068
069    /**
070     * Creates a splitted URI using the strict parsing mode.<p>
071     *
072     * @param uri the URI to split
073     */
074    public CmsUriSplitter(String uri) {
075
076        this(uri, true);
077    }
078
079    /**
080     * Creates a splitted URI using the given parsing mode.<p>
081     *
082     * Using 'strict' parsing mode, all requirements for an URI are checked.
083     * If 'strict' is set to <code>false</code>, then only some simple parsing rules are applied,
084     * in which case the result may not be 100% valid (but still usable).
085     * If 'strict' parsing generates an error, then simple parsing is used as a fallback.<p>
086     *
087     * @param uri the URI to split
088     * @param strict if <code>true</code>, then 'strict' parsing mode is used, otherwise a relaxed URI parsing is done
089     */
090    public CmsUriSplitter(String uri, boolean strict) {
091
092        m_uri = uri;
093        m_errorFree = true;
094        m_isStrict = strict;
095        // use strict parsing
096        if (strict) {
097            try {
098                URI u = new URI(uri);
099                m_protocol = u.getScheme();
100                URI tempUri = new URIBuilder(u).setCustomQuery(null).setFragment(null).build();
101                m_prefix = tempUri.toASCIIString();
102                m_anchor = u.getRawFragment();
103                m_query = u.getRawQuery();
104            } catch (Exception exc) {
105                // may be thrown by URI constructor if URI is invalid
106                strict = false;
107                m_errorFree = false;
108            }
109        }
110        if (!strict && (uri != null)) {
111            // use simple parsing
112            StringBuffer prefix = new StringBuffer(uri.length());
113            StringBuffer query = EMPTY_BUFFER;
114            StringBuffer anchor = EMPTY_BUFFER;
115            int len = uri.length();
116            int cur = 0;
117            for (int i = 0; i < len; i++) {
118                char c = uri.charAt(i);
119                if ((cur == 0) && (c == ':')) {
120                    m_protocol = prefix.toString();
121                }
122                if (c == '#') { // always reset anchor buffer
123                    // start of anchor
124                    cur = 2;
125                    anchor = new StringBuffer(uri.length());
126                    continue;
127                }
128                if ((c == '?') && (cur != 2)) { // only reset query buffer if we're not already in the anchor
129                    cur = 1;
130                    query = new StringBuffer(uri.length());
131                    continue;
132                }
133                switch (cur) {
134                    case 1:
135                        // append to query
136                        query.append(c);
137                        break;
138                    case 2:
139                        // append to anchor
140                        anchor.append(c);
141                        break;
142                    default:
143                        // append to prefix
144                        prefix.append(c);
145                        break;
146                }
147            }
148
149            if (prefix.length() > 0) {
150                m_prefix = prefix.toString();
151            }
152            if (anchor.length() > 0) {
153                m_anchor = anchor.toString();
154            }
155            if (query.length() > 0) {
156                m_query = query.toString();
157            }
158        }
159
160    }
161
162    /**
163     * Checks if the given URI is well formed.<p>
164     *
165     * @param uri the URI to check
166     *
167     * @return <code>true</code> if the given URI is well formed
168     */
169    @SuppressWarnings("unused")
170    public static boolean isValidUri(String uri) {
171
172        boolean result = false;
173        try {
174            new URI(uri);
175            result = true;
176        } catch (Exception e) {
177            // nothing to do
178        }
179        return result;
180    }
181
182    /**
183     * @see java.lang.Object#equals(java.lang.Object)
184     */
185    @Override
186    public boolean equals(Object obj) {
187
188        if (obj == this) {
189            return true;
190        }
191        if (obj instanceof CmsUriSplitter) {
192            CmsUriSplitter other = (CmsUriSplitter)obj;
193            if (!((m_protocol == other.m_protocol) || ((m_protocol != null) && m_protocol.equals(other.m_protocol)))) {
194                return false;
195            }
196            if (!((m_prefix == other.m_prefix) || ((m_prefix != null) && m_prefix.equals(other.m_prefix)))) {
197                return false;
198            }
199            if (!((m_anchor == other.m_anchor) || ((m_anchor != null) && m_anchor.equals(other.m_anchor)))) {
200                return false;
201            }
202            if (!((m_query == other.m_query) || ((m_query != null) && m_query.equals(other.m_query)))) {
203                return false;
204            }
205            return true;
206        }
207        return false;
208    }
209
210    /**
211     * Returns the anchor part of the uri, for example <code>someanchor</code>,
212     * or <code>null</code> if no anchor is available.<p>
213     *
214     * @return the anchor part of the uri
215     */
216    public String getAnchor() {
217
218        return m_anchor;
219    }
220
221    /**
222     * Returns the prefix part of the uri, for example <code>http://www.opencms.org/some/path/</code>,
223     * or <code>null</code> if no prefix is available.<p>
224     *
225     * @return the prefix part of the uri
226     */
227    public String getPrefix() {
228
229        return m_prefix;
230    }
231
232    /**
233     * Returns the URI protocol, for example <code>http</code> or <code>https</code>.<p>
234     *
235     * @return the URI protocol
236     */
237    public String getProtocol() {
238
239        return m_protocol;
240    }
241
242    /**
243     * Returns the query part of the uri, for example <code>a=b&c=d</code>,
244     * or <code>null</code> if no query is available.<p>
245     *
246     * @return the query part of the uri
247     */
248    public String getQuery() {
249
250        return m_query;
251    }
252
253    /**
254     * Returns the suffix part of the uri, a combination of query and anchor,
255     * for example <code>?a=b&c=d#someanchor</code>,
256     * or the empty String if no suffix is available.<p>
257     *
258     * @return the suffix part of the uri
259     */
260    public String getSuffix() {
261
262        if (m_suffix == null) {
263            StringBuffer result = new StringBuffer();
264            if (m_query != null) {
265                result.append('?');
266                result.append(m_query);
267            }
268            if (m_anchor != null) {
269                result.append('#');
270                result.append(m_anchor);
271            }
272            m_suffix = result.toString();
273        }
274        return m_suffix;
275    }
276
277    /**
278     * Returns the URI String passed to this URI splitter.<p>
279     *
280     * @return the URI String passed to this URI splitter
281     */
282    public String getUri() {
283
284        return m_uri;
285    }
286
287    /**
288     * @see java.lang.Object#hashCode()
289     */
290    @Override
291    public int hashCode() {
292
293        int hashCode = 0;
294        if (m_prefix != null) {
295            hashCode += m_prefix.hashCode();
296        }
297        if (m_anchor != null) {
298            hashCode += m_anchor.hashCode();
299        }
300        if (m_query != null) {
301            hashCode += m_query.hashCode();
302        }
303        return hashCode;
304    }
305
306    /**
307     * Returns <code>true</code> if the URI was parsed error free in 'strict' mode,
308     * or if the simple mode was used.<p>
309     *
310     * @return <code>true</code> if the URI was parsed error free in 'strict' mode,
311     *      or if the simple mode was used
312     */
313    public boolean isErrorFree() {
314
315        return m_errorFree;
316    }
317
318    /**
319     * Returns an URI object created from the original input String.<p>
320     *
321     * This method will do a "best effort" to convert the original input String to a legal URI.
322     * Most notably, it will be able to handle original input Strings that contain a space " "
323     * and other usually illegal characters.<p>
324     *
325     * @return an URI object created from the original input String
326     *
327     * @throws URISyntaxException in case no URI object can be created from the original input String
328     */
329    public URI toURI() throws URISyntaxException {
330
331        if (m_isStrict && m_errorFree) {
332            // we have already verified that the URI contains no errors
333            return new URI(m_uri);
334        }
335        // create a new URI from the components
336        // using this constructor the input will be escaped if required
337        return new URI(null, m_prefix + (m_query != null ? "?" + m_query : ""), m_anchor);
338    }
339}