001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.relations; 029 030import org.opencms.file.CmsFile; 031import org.opencms.file.CmsObject; 032import org.opencms.file.CmsResource; 033import org.opencms.file.CmsResourceFilter; 034import org.opencms.file.types.CmsResourceTypePointer; 035import org.opencms.main.CmsException; 036import org.opencms.main.CmsLog; 037import org.opencms.main.OpenCms; 038import org.opencms.report.CmsLogReport; 039import org.opencms.report.I_CmsReport; 040import org.opencms.scheduler.I_CmsScheduledJob; 041import org.opencms.util.CmsUriSplitter; 042 043import java.net.HttpURLConnection; 044import java.net.MalformedURLException; 045import java.net.URI; 046import java.net.URISyntaxException; 047import java.net.URL; 048import java.security.SecureRandom; 049import java.security.cert.X509Certificate; 050import java.util.HashMap; 051import java.util.Iterator; 052import java.util.List; 053import java.util.Map; 054 055import javax.net.ssl.HostnameVerifier; 056import javax.net.ssl.HttpsURLConnection; 057import javax.net.ssl.SSLContext; 058import javax.net.ssl.SSLSession; 059import javax.net.ssl.TrustManager; 060import javax.net.ssl.X509TrustManager; 061 062import org.apache.commons.logging.Log; 063 064/** 065 * Class to validate pointer links.<p> 066 * 067 * @since 6.0.0 068 */ 069public class CmsExternalLinksValidator implements I_CmsScheduledJob { 070 071 /** The log object for this class. */ 072 private static final Log LOG = CmsLog.getLog(CmsExternalLinksValidator.class); 073 074 /** The report for the output. */ 075 private I_CmsReport m_report; 076 077 /** 078 * Checks if the given url is valid.<p> 079 * 080 * @param check the url to check 081 * @param cms a OpenCms context object 082 * 083 * @return false if the url could not be accessed 084 */ 085 public static boolean checkUrl(CmsObject cms, String check) { 086 087 // first, create an URI from the string representation 088 URI uri = null; 089 try { 090 uri = new CmsUriSplitter(check, true).toURI(); 091 } catch (URISyntaxException exc) { 092 return false; 093 } 094 try { 095 if (!uri.isAbsolute()) { 096 return cms.existsResource(cms.getRequestContext().removeSiteRoot(uri.getPath())); 097 } 098 URL url = uri.toURL(); 099 String protocol = url.getProtocol(); 100 if ("http".equals(protocol) || "https".equals(protocol)) { 101 // ensure that file is encoded properly 102 HttpURLConnection httpcon = (HttpURLConnection)url.openConnection(); 103 adjustConnection(httpcon); 104 int responseCode = httpcon.getResponseCode(); 105 // accepting all status codes 2xx success and 3xx - redirect 106 return ((responseCode >= 200) && (responseCode < 400)); 107 } 108 return true; 109 } catch (MalformedURLException mue) { 110 return false; 111 } catch (Exception ex) { 112 return false; 113 } 114 } 115 116 /** 117 * Adjust the connection to retrieve the newsletter. Currently, SSL-Certificate verification can be disabled. 118 * 119 * @param con the connection to manipulate 120 */ 121 private static void adjustConnection(HttpURLConnection con) { 122 123 con.setConnectTimeout(2000); // wait at most two second for the connection 124 con.setReadTimeout(8000); // wait at most 8 seconds for reading 125 if (con instanceof HttpsURLConnection) { 126 HttpsURLConnection httpsUrlConnection = (HttpsURLConnection)con; 127 // Create a trust manager that does not validate certificate chains 128 TrustManager[] trustAllCerts = new TrustManager[] {new X509TrustManager() { 129 130 @Override 131 public void checkClientTrusted(X509Certificate[] certs, String authType) { 132 133 // do nothing 134 } 135 136 @Override 137 public void checkServerTrusted(X509Certificate[] certs, String authType) { 138 139 // do nothing 140 } 141 142 @Override 143 public X509Certificate[] getAcceptedIssuers() { 144 145 return null; 146 } 147 }}; 148 149 // Set the all-trusting trust manager for the connection 150 try { 151 SSLContext sc = SSLContext.getInstance("TLS"); 152 sc.init(null, trustAllCerts, new SecureRandom()); 153 httpsUrlConnection.setSSLSocketFactory(sc.getSocketFactory()); 154 } catch (Exception e) { 155 LOG.warn(e, e); 156 } 157 158 // do not verify hostnames 159 httpsUrlConnection.setHostnameVerifier(new HostnameVerifier() { 160 161 @Override 162 public boolean verify(String arg0, SSLSession arg1) { 163 164 return true; 165 } 166 }); 167 } 168 169 } 170 171 /** 172 * This method is called by the cron scheduler.<p> 173 * 174 * @param cms a OpenCms context object 175 * @param parameters link check parameters 176 * @return the String that is written to the OpenCms log 177 * @throws CmsException if something goes wrong 178 */ 179 public String launch(CmsObject cms, Map<String, String> parameters) throws CmsException { 180 181 if (Boolean.valueOf(parameters.get("writeLog")).booleanValue()) { 182 m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class); 183 } 184 validateLinks(cms); 185 return "CmsExternLinkValidator.launch(): Links checked."; 186 } 187 188 /** 189 * Sets the report for the output.<p> 190 * 191 * @param report the report for the output 192 */ 193 public void setReport(I_CmsReport report) { 194 195 m_report = report; 196 } 197 198 /** 199 * Validate all links.<p> 200 * 201 * @param cms a OpenCms context object 202 * 203 * @throws CmsException if something goes wrong 204 */ 205 @SuppressWarnings("deprecation") 206 public void validateLinks(CmsObject cms) throws CmsException { 207 208 if (m_report == null) { 209 m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class); 210 } 211 212 m_report.println( 213 Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_BEGIN_0), 214 I_CmsReport.FORMAT_HEADLINE); 215 216 // get all links 217 218 int pointerId = OpenCms.getResourceManager().getResourceType( 219 CmsResourceTypePointer.getStaticTypeName()).getTypeId(); 220 CmsResourceFilter filter = CmsResourceFilter.ONLY_VISIBLE_NO_DELETED.addRequireType(pointerId); 221 List<CmsResource> links = cms.readResources("/", filter); 222 Iterator<CmsResource> iterator = links.iterator(); 223 Map<String, String> brokenLinks = new HashMap<String, String>(); 224 225 for (int i = 1; iterator.hasNext(); i++) { 226 CmsFile link = cms.readFile(cms.getSitePath(iterator.next()), filter); 227 String linkUrl = new String(link.getContents()); 228 229 // print to the report 230 m_report.print( 231 org.opencms.report.Messages.get().container( 232 org.opencms.report.Messages.RPT_SUCCESSION_1, 233 Integer.valueOf(i), 234 Integer.valueOf(links.size())), 235 I_CmsReport.FORMAT_NOTE); 236 m_report.print(Messages.get().container(Messages.RPT_VALIDATE_LINK_0), I_CmsReport.FORMAT_NOTE); 237 m_report.print( 238 org.opencms.report.Messages.get().container( 239 org.opencms.report.Messages.RPT_ARGUMENT_1, 240 link.getRootPath())); 241 m_report.print(Messages.get().container(Messages.GUI_LINK_POINTING_TO_0), I_CmsReport.FORMAT_NOTE); 242 m_report.print( 243 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_ARGUMENT_1, linkUrl)); 244 m_report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0)); 245 246 // check link and append it to the list of broken links, if test fails 247 if (!checkUrl(cms, linkUrl)) { 248 brokenLinks.put(link.getRootPath(), linkUrl); 249 m_report.println(Messages.get().container(Messages.RPT_BROKEN_0), I_CmsReport.FORMAT_ERROR); 250 } else { 251 m_report.println( 252 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 253 I_CmsReport.FORMAT_OK); 254 } 255 } 256 257 m_report.println( 258 Messages.get().container( 259 Messages.RPT_LINK_VALIDATION_STAT_2, 260 Integer.valueOf(links.size()), 261 Integer.valueOf(brokenLinks.size())), 262 I_CmsReport.FORMAT_HEADLINE); 263 m_report.println( 264 Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_END_0), 265 I_CmsReport.FORMAT_HEADLINE); 266 267 OpenCms.getLinkManager().setPointerLinkValidationResult(new CmsExternalLinksValidationResult(brokenLinks)); 268 } 269 270}