001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.relations; 029 030import org.opencms.file.CmsFile; 031import org.opencms.file.CmsObject; 032import org.opencms.file.CmsResource; 033import org.opencms.file.CmsResourceFilter; 034import org.opencms.file.types.CmsResourceTypePointer; 035import org.opencms.main.CmsException; 036import org.opencms.main.OpenCms; 037import org.opencms.report.CmsLogReport; 038import org.opencms.report.I_CmsReport; 039import org.opencms.scheduler.I_CmsScheduledJob; 040import org.opencms.util.CmsUriSplitter; 041 042import java.net.HttpURLConnection; 043import java.net.MalformedURLException; 044import java.net.URI; 045import java.net.URISyntaxException; 046import java.net.URL; 047import java.util.HashMap; 048import java.util.Iterator; 049import java.util.List; 050import java.util.Map; 051 052/** 053 * Class to validate pointer links.<p> 054 * 055 * @since 6.0.0 056 */ 057public class CmsExternalLinksValidator implements I_CmsScheduledJob { 058 059 /** The report for the output. */ 060 private I_CmsReport m_report; 061 062 /** 063 * Checks if the given url is valid.<p> 064 * 065 * @param check the url to check 066 * @param cms a OpenCms context object 067 * 068 * @return false if the url could not be accessed 069 */ 070 public static boolean checkUrl(CmsObject cms, String check) { 071 072 // first, create an URI from the string representation 073 URI uri = null; 074 try { 075 uri = new CmsUriSplitter(check, true).toURI(); 076 } catch (URISyntaxException exc) { 077 return false; 078 } 079 try { 080 if (!uri.isAbsolute()) { 081 return cms.existsResource(cms.getRequestContext().removeSiteRoot(uri.getPath())); 082 } else { 083 URL url = uri.toURL(); 084 if ("http".equals(url.getProtocol())) { 085 // ensure that file is encoded properly 086 HttpURLConnection httpcon = (HttpURLConnection)url.openConnection(); 087 int responseCode = httpcon.getResponseCode(); 088 // accepting all status codes 2xx success and 3xx - redirect 089 return ((responseCode >= 200) && (responseCode < 400)); 090 } else { 091 return true; 092 } 093 } 094 } catch (MalformedURLException mue) { 095 return false; 096 } catch (Exception ex) { 097 return false; 098 } 099 } 100 101 /** 102 * This method is called by the cron scheduler.<p> 103 * 104 * @param cms a OpenCms context object 105 * @param parameters link check parameters 106 * @return the String that is written to the OpenCms log 107 * @throws CmsException if something goes wrong 108 */ 109 public String launch(CmsObject cms, Map<String, String> parameters) throws CmsException { 110 111 if (Boolean.valueOf(parameters.get("writeLog")).booleanValue()) { 112 m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class); 113 } 114 validateLinks(cms); 115 return "CmsExternLinkValidator.launch(): Links checked."; 116 } 117 118 /** 119 * Sets the report for the output.<p> 120 * 121 * @param report the report for the output 122 */ 123 public void setReport(I_CmsReport report) { 124 125 m_report = report; 126 } 127 128 /** 129 * Validate all links.<p> 130 * 131 * @param cms a OpenCms context object 132 * 133 * @throws CmsException if something goes wrong 134 */ 135 @SuppressWarnings("deprecation") 136 public void validateLinks(CmsObject cms) throws CmsException { 137 138 if (m_report == null) { 139 m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class); 140 } 141 142 m_report.println( 143 Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_BEGIN_0), 144 I_CmsReport.FORMAT_HEADLINE); 145 146 // get all links 147 148 int pointerId = OpenCms.getResourceManager().getResourceType( 149 CmsResourceTypePointer.getStaticTypeName()).getTypeId(); 150 CmsResourceFilter filter = CmsResourceFilter.ONLY_VISIBLE_NO_DELETED.addRequireType(pointerId); 151 List<CmsResource> links = cms.readResources("/", filter); 152 Iterator<CmsResource> iterator = links.iterator(); 153 Map<String, String> brokenLinks = new HashMap<String, String>(); 154 155 for (int i = 1; iterator.hasNext(); i++) { 156 CmsFile link = cms.readFile(cms.getSitePath(iterator.next()), filter); 157 String linkUrl = new String(link.getContents()); 158 159 // print to the report 160 m_report.print( 161 org.opencms.report.Messages.get().container( 162 org.opencms.report.Messages.RPT_SUCCESSION_1, 163 Integer.valueOf(i), 164 Integer.valueOf(links.size())), 165 I_CmsReport.FORMAT_NOTE); 166 m_report.print(Messages.get().container(Messages.RPT_VALIDATE_LINK_0), I_CmsReport.FORMAT_NOTE); 167 m_report.print( 168 org.opencms.report.Messages.get().container( 169 org.opencms.report.Messages.RPT_ARGUMENT_1, 170 link.getRootPath())); 171 m_report.print(Messages.get().container(Messages.GUI_LINK_POINTING_TO_0), I_CmsReport.FORMAT_NOTE); 172 m_report.print( 173 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_ARGUMENT_1, linkUrl)); 174 m_report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0)); 175 176 // check link and append it to the list of broken links, if test fails 177 if (!checkUrl(cms, linkUrl)) { 178 brokenLinks.put(link.getRootPath(), linkUrl); 179 m_report.println(Messages.get().container(Messages.RPT_BROKEN_0), I_CmsReport.FORMAT_ERROR); 180 } else { 181 m_report.println( 182 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 183 I_CmsReport.FORMAT_OK); 184 } 185 } 186 187 m_report.println( 188 Messages.get().container( 189 Messages.RPT_LINK_VALIDATION_STAT_2, 190 Integer.valueOf(links.size()), 191 Integer.valueOf(brokenLinks.size())), 192 I_CmsReport.FORMAT_HEADLINE); 193 m_report.println( 194 Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_END_0), 195 I_CmsReport.FORMAT_HEADLINE); 196 197 OpenCms.getLinkManager().setPointerLinkValidationResult(new CmsExternalLinksValidationResult(brokenLinks)); 198 } 199}