001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.relations;
029
030import org.opencms.file.CmsFile;
031import org.opencms.file.CmsObject;
032import org.opencms.file.CmsResource;
033import org.opencms.file.CmsResourceFilter;
034import org.opencms.file.types.CmsResourceTypePointer;
035import org.opencms.main.CmsException;
036import org.opencms.main.CmsLog;
037import org.opencms.main.OpenCms;
038import org.opencms.report.CmsLogReport;
039import org.opencms.report.I_CmsReport;
040import org.opencms.scheduler.I_CmsScheduledJob;
041import org.opencms.util.CmsUriSplitter;
042
043import java.net.HttpURLConnection;
044import java.net.MalformedURLException;
045import java.net.URI;
046import java.net.URISyntaxException;
047import java.net.URL;
048import java.security.SecureRandom;
049import java.security.cert.X509Certificate;
050import java.util.HashMap;
051import java.util.Iterator;
052import java.util.List;
053import java.util.Map;
054
055import javax.net.ssl.HostnameVerifier;
056import javax.net.ssl.HttpsURLConnection;
057import javax.net.ssl.SSLContext;
058import javax.net.ssl.SSLSession;
059import javax.net.ssl.TrustManager;
060import javax.net.ssl.X509TrustManager;
061
062import org.apache.commons.logging.Log;
063
064/**
065 * Class to validate pointer links.<p>
066 *
067 * @since 6.0.0
068 */
069public class CmsExternalLinksValidator implements I_CmsScheduledJob {
070
071    /** The log object for this class. */
072    private static final Log LOG = CmsLog.getLog(CmsExternalLinksValidator.class);
073
074    /** The report for the output. */
075    private I_CmsReport m_report;
076
077    /**
078     * Checks if the given url is valid.<p>
079     *
080     * @param check the url to check
081     * @param cms a OpenCms context object
082     *
083     * @return false if the url could not be accessed
084     */
085    public static boolean checkUrl(CmsObject cms, String check) {
086
087        // first, create an URI from the string representation
088        URI uri = null;
089        try {
090            uri = new CmsUriSplitter(check, true).toURI();
091        } catch (URISyntaxException exc) {
092            return false;
093        }
094        try {
095            if (!uri.isAbsolute()) {
096                return cms.existsResource(cms.getRequestContext().removeSiteRoot(uri.getPath()));
097            }
098            URL url = uri.toURL();
099            String protocol = url.getProtocol();
100            if ("http".equals(protocol) || "https".equals(protocol)) {
101                // ensure that file is encoded properly
102                HttpURLConnection httpcon = (HttpURLConnection)url.openConnection();
103                adjustConnection(httpcon);
104                int responseCode = httpcon.getResponseCode();
105                // accepting all status codes 2xx success and 3xx - redirect
106                return ((responseCode >= 200) && (responseCode < 400));
107            }
108            return true;
109        } catch (MalformedURLException mue) {
110            return false;
111        } catch (Exception ex) {
112            return false;
113        }
114    }
115
116    /**
117     * Adjust the connection to retrieve the newsletter. Currently, SSL-Certificate verification can be disabled.
118     *
119     * @param con the connection to manipulate
120     */
121    private static void adjustConnection(HttpURLConnection con) {
122
123        con.setConnectTimeout(2000); // wait at most two second for the connection
124        con.setReadTimeout(8000); // wait at most 8 seconds for reading
125        if (con instanceof HttpsURLConnection) {
126            HttpsURLConnection httpsUrlConnection = (HttpsURLConnection)con;
127            // Create a trust manager that does not validate certificate chains
128            TrustManager[] trustAllCerts = new TrustManager[] {new X509TrustManager() {
129
130                @Override
131                public void checkClientTrusted(X509Certificate[] certs, String authType) {
132
133                    // do nothing
134                }
135
136                @Override
137                public void checkServerTrusted(X509Certificate[] certs, String authType) {
138
139                    // do nothing
140                }
141
142                @Override
143                public X509Certificate[] getAcceptedIssuers() {
144
145                    return null;
146                }
147            }};
148
149            // Set the all-trusting trust manager for the connection
150            try {
151                SSLContext sc = SSLContext.getInstance("TLS");
152                sc.init(null, trustAllCerts, new SecureRandom());
153                httpsUrlConnection.setSSLSocketFactory(sc.getSocketFactory());
154            } catch (Exception e) {
155                LOG.warn(e, e);
156            }
157
158            // do not verify hostnames
159            httpsUrlConnection.setHostnameVerifier(new HostnameVerifier() {
160
161                @Override
162                public boolean verify(String arg0, SSLSession arg1) {
163
164                    return true;
165                }
166            });
167        }
168
169    }
170
171    /**
172     * This method is called by the cron scheduler.<p>
173     *
174     * @param cms a OpenCms context object
175     * @param parameters link check parameters
176     * @return the String that is written to the OpenCms log
177     * @throws CmsException if something goes wrong
178     */
179    public String launch(CmsObject cms, Map<String, String> parameters) throws CmsException {
180
181        if (Boolean.valueOf(parameters.get("writeLog")).booleanValue()) {
182            m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class);
183        }
184        validateLinks(cms);
185        return "CmsExternLinkValidator.launch(): Links checked.";
186    }
187
188    /**
189     * Sets the report for the output.<p>
190     *
191     * @param report the report for the output
192     */
193    public void setReport(I_CmsReport report) {
194
195        m_report = report;
196    }
197
198    /**
199     * Validate all links.<p>
200     *
201     * @param cms a OpenCms context object
202     *
203     * @throws CmsException if something goes wrong
204     */
205    @SuppressWarnings("deprecation")
206    public void validateLinks(CmsObject cms) throws CmsException {
207
208        if (m_report == null) {
209            m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class);
210        }
211
212        m_report.println(
213            Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_BEGIN_0),
214            I_CmsReport.FORMAT_HEADLINE);
215
216        // get all links
217
218        int pointerId = OpenCms.getResourceManager().getResourceType(
219            CmsResourceTypePointer.getStaticTypeName()).getTypeId();
220        CmsResourceFilter filter = CmsResourceFilter.ONLY_VISIBLE_NO_DELETED.addRequireType(pointerId);
221        List<CmsResource> links = cms.readResources("/", filter);
222        Iterator<CmsResource> iterator = links.iterator();
223        Map<String, String> brokenLinks = new HashMap<String, String>();
224
225        for (int i = 1; iterator.hasNext(); i++) {
226            CmsFile link = cms.readFile(cms.getSitePath(iterator.next()), filter);
227            String linkUrl = new String(link.getContents());
228
229            // print to the report
230            m_report.print(
231                org.opencms.report.Messages.get().container(
232                    org.opencms.report.Messages.RPT_SUCCESSION_1,
233                    Integer.valueOf(i),
234                    Integer.valueOf(links.size())),
235                I_CmsReport.FORMAT_NOTE);
236            m_report.print(Messages.get().container(Messages.RPT_VALIDATE_LINK_0), I_CmsReport.FORMAT_NOTE);
237            m_report.print(
238                org.opencms.report.Messages.get().container(
239                    org.opencms.report.Messages.RPT_ARGUMENT_1,
240                    link.getRootPath()));
241            m_report.print(Messages.get().container(Messages.GUI_LINK_POINTING_TO_0), I_CmsReport.FORMAT_NOTE);
242            m_report.print(
243                org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_ARGUMENT_1, linkUrl));
244            m_report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0));
245
246            // check link and append it to the list of broken links, if test fails
247            if (!checkUrl(cms, linkUrl)) {
248                brokenLinks.put(link.getRootPath(), linkUrl);
249                m_report.println(Messages.get().container(Messages.RPT_BROKEN_0), I_CmsReport.FORMAT_ERROR);
250            } else {
251                m_report.println(
252                    org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
253                    I_CmsReport.FORMAT_OK);
254            }
255        }
256
257        m_report.println(
258            Messages.get().container(
259                Messages.RPT_LINK_VALIDATION_STAT_2,
260                Integer.valueOf(links.size()),
261                Integer.valueOf(brokenLinks.size())),
262            I_CmsReport.FORMAT_HEADLINE);
263        m_report.println(
264            Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_END_0),
265            I_CmsReport.FORMAT_HEADLINE);
266
267        OpenCms.getLinkManager().setPointerLinkValidationResult(new CmsExternalLinksValidationResult(brokenLinks));
268    }
269
270}