001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.relations; 029 030import org.opencms.db.CmsDbContext; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishList; 033import org.opencms.file.CmsProject; 034import org.opencms.file.CmsResource; 035import org.opencms.file.CmsResourceFilter; 036import org.opencms.file.CmsVfsResourceNotFoundException; 037import org.opencms.file.types.I_CmsResourceType; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsIllegalStateException; 040import org.opencms.main.CmsLog; 041import org.opencms.main.OpenCms; 042import org.opencms.report.I_CmsReport; 043import org.opencms.util.CmsStringUtil; 044import org.opencms.workplace.threads.A_CmsProgressThread; 045 046import java.util.ArrayList; 047import java.util.HashMap; 048import java.util.Iterator; 049import java.util.List; 050import java.util.Map; 051 052import org.apache.commons.logging.Log; 053 054/** 055 * Validates relations of resources in the OpenCms VFS.<p> 056 * 057 * Relations are, for instance, href attribs in anchor tags and src attribs in 058 * image tags, as well as OpenCmsVfsFile values in Xml Content.<p> 059 * 060 * External links to targets outside the OpenCms VFS don't get validated.<p> 061 * 062 * Objects using this class are responsible to handle detected broken links.<p> 063 * 064 * @since 6.3.0 065 */ 066public class CmsRelationSystemValidator { 067 068 /** The log object for this class. */ 069 private static final Log LOG = CmsLog.getLog(CmsRelationSystemValidator.class); 070 071 /** The driver manager. */ 072 protected CmsDriverManager m_driverManager; 073 074 /** 075 * Default constructor.<p> 076 * 077 * @param driverManager The Cms driver manager 078 */ 079 public CmsRelationSystemValidator(CmsDriverManager driverManager) { 080 081 m_driverManager = driverManager; 082 } 083 084 /** 085 * Validates the relations against the online project.<p> 086 * 087 * The result is printed to the given report.<p> 088 * 089 * Validating references means to answer the question, whether 090 * we would have broken links in the online project if the given 091 * publish list would get published.<p> 092 * 093 * @param dbc the database context 094 * @param publishList the publish list to validate 095 * @param report a report to print messages 096 * 097 * @return a map with lists of invalid links 098 * (<code>{@link org.opencms.relations.CmsRelation}}</code> objects) 099 * keyed by root paths 100 * 101 * @throws Exception if something goes wrong 102 */ 103 public Map<String, List<CmsRelation>> validateResources( 104 CmsDbContext dbc, 105 CmsPublishList publishList, 106 I_CmsReport report) 107 throws Exception { 108 109 // check if progress should be set in the thread 110 A_CmsProgressThread thread = null; 111 if (Thread.currentThread() instanceof A_CmsProgressThread) { 112 thread = (A_CmsProgressThread)Thread.currentThread(); 113 } 114 115 Map<String, List<CmsRelation>> invalidResources = new HashMap<String, List<CmsRelation>>(); 116 boolean interProject = (publishList != null); 117 if (report != null) { 118 report.println( 119 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_BEGIN_0), 120 I_CmsReport.FORMAT_HEADLINE); 121 } 122 List<CmsResource> resources = new ArrayList<CmsResource>(); 123 if (publishList == null) { 124 CmsResourceFilter filter = CmsResourceFilter.IGNORE_EXPIRATION; 125 List<I_CmsResourceType> resTypes = OpenCms.getResourceManager().getResourceTypes(); 126 Iterator<I_CmsResourceType> itTypes = resTypes.iterator(); 127 int count = 0; 128 while (itTypes.hasNext()) { 129 130 // set progress in thread (first 10 percent) 131 count++; 132 if (thread != null) { 133 134 if (thread.isInterrupted()) { 135 throw new CmsIllegalStateException( 136 org.opencms.workplace.commons.Messages.get().container( 137 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 138 } 139 thread.setProgress((count * 10) / resTypes.size()); 140 } 141 142 I_CmsResourceType type = itTypes.next(); 143 if (type instanceof I_CmsLinkParseable) { 144 filter = filter.addRequireType(type.getTypeId()); 145 try { 146 resources.addAll( 147 m_driverManager.readResources( 148 dbc, 149 m_driverManager.readResource(dbc, "/", filter), 150 filter, 151 true)); 152 } catch (CmsException e) { 153 LOG.error( 154 Messages.get().getBundle().key(Messages.LOG_RETRIEVAL_RESOURCES_1, type.getTypeName()), 155 e); 156 } 157 } 158 } 159 } else { 160 resources.addAll(publishList.getAllResources()); 161 } 162 163 // populate a lookup map with the project resources that 164 // actually get published keyed by their resource names. 165 // second, resources that don't get validated are ignored. 166 Map<String, CmsResource> offlineFilesLookup = new HashMap<String, CmsResource>(); 167 Iterator<CmsResource> itResources = resources.iterator(); 168 int count = 0; 169 while (itResources.hasNext()) { 170 171 // set progress in thread (next 10 percent) 172 count++; 173 if (thread != null) { 174 175 if (thread.isInterrupted()) { 176 throw new CmsIllegalStateException( 177 org.opencms.workplace.commons.Messages.get().container( 178 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 179 } 180 thread.setProgress(((count * 10) / resources.size()) + 10); 181 } 182 183 CmsResource resource = itResources.next(); 184 offlineFilesLookup.put(resource.getRootPath(), resource); 185 offlineFilesLookup.put(resource.getStructureId().toString(), resource); 186 } 187 CmsProject project = dbc.currentProject(); 188 if (interProject) { 189 try { 190 project = m_driverManager.readProject(dbc, CmsProject.ONLINE_PROJECT_ID); 191 } catch (CmsException e) { 192 // should never happen 193 LOG.error(e.getLocalizedMessage(), e); 194 } 195 } 196 197 boolean foundBrokenLinks = false; 198 for (int index = 0, size = resources.size(); index < size; index++) { 199 200 // set progress in thread (next 20 percent; leave rest for creating the list and the html) 201 if (thread != null) { 202 203 if (thread.isInterrupted()) { 204 throw new CmsIllegalStateException( 205 org.opencms.workplace.commons.Messages.get().container( 206 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 207 } 208 thread.setProgress(((index * 20) / resources.size()) + 20); 209 } 210 211 CmsResource resource = resources.get(index); 212 String resourceName = resource.getRootPath(); 213 214 if (report != null) { 215 report.print( 216 org.opencms.report.Messages.get().container( 217 org.opencms.report.Messages.RPT_SUCCESSION_2, 218 Integer.valueOf(index + 1), 219 Integer.valueOf(size)), 220 I_CmsReport.FORMAT_NOTE); 221 report.print(Messages.get().container(Messages.RPT_HTMLLINK_VALIDATING_0), I_CmsReport.FORMAT_NOTE); 222 report.print( 223 org.opencms.report.Messages.get().container( 224 org.opencms.report.Messages.RPT_ARGUMENT_1, 225 dbc.removeSiteRoot(resourceName))); 226 report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0)); 227 } 228 List<CmsRelation> brokenLinks = validateLinks(dbc, resource, offlineFilesLookup, project, report); 229 if (brokenLinks.size() > 0) { 230 // the resource contains broken links 231 invalidResources.put(resourceName, brokenLinks); 232 foundBrokenLinks = true; 233 } else { 234 // the resource contains *NO* broken links 235 if (report != null) { 236 report.println( 237 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 238 I_CmsReport.FORMAT_OK); 239 } 240 } 241 } 242 243 if (foundBrokenLinks) { 244 // print a summary if we found broken links in the validated resources 245 if (report != null) { 246 report.println( 247 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_ERROR_0), 248 I_CmsReport.FORMAT_ERROR); 249 } 250 } 251 if (report != null) { 252 report.println( 253 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_END_0), 254 I_CmsReport.FORMAT_HEADLINE); 255 } 256 return invalidResources; 257 } 258 259 /** 260 * Checks a link to a resource which has been deleted.<p> 261 * @param relation 262 * 263 * @param link the URI of the resource which has a link to the deleted resource 264 * @param fileLookup a lookup table of files to be published 265 * @param relationTargets 266 * 267 * @return true if the resource which has a link to the deleted resource is also going to be deleted 268 */ 269 protected boolean checkLinkForDeletedLinkTarget( 270 CmsRelation relation, 271 String link, 272 Map<String, CmsResource> fileLookup) { 273 274 boolean isValidLink = false; 275 // since we are going to delete the resource 276 // check if the linked resource is also to be deleted 277 if (fileLookup.containsKey(link) || fileLookup.containsKey(relation.getSourceId().toString())) { 278 // Technically, if the relation source is going to be published too and is not deleted, the link is not valid. But in that case, validateLinks will also be called for that resource and detect broken the broken link there. 279 isValidLink = true; 280 } 281 return isValidLink; 282 } 283 284 /** 285 * Checks a link from a resource which has changed.<p> 286 * 287 * @param dbc the current dbc 288 * @param resource the link source 289 * @param relation the relation 290 * @param link the link target 291 * @param project the current project 292 * @param fileLookup a lookup table which contains the files which are going to be published 293 * 294 * @return true if the link will be valid after publishing 295 */ 296 protected boolean checkLinkForNewOrChangedLinkSource( 297 CmsDbContext dbc, 298 CmsResource resource, 299 CmsRelation relation, 300 String link, 301 CmsProject project, 302 Map<String, CmsResource> fileLookup) { 303 304 boolean isValidLink = true; 305 // the link is valid... 306 try { 307 // ... if the linked resource exists in the online project 308 // search the target of link in the online project 309 try { 310 link = m_driverManager.getVfsDriver( 311 dbc).readResource(dbc, project.getUuid(), relation.getTargetId(), true).getRootPath(); 312 } catch (CmsVfsResourceNotFoundException e) { 313 // reading by id failed, this means that the link variable still equals relation.getTargetPath() 314 if (LOG.isDebugEnabled()) { 315 LOG.debug( 316 Messages.get().getBundle().key( 317 Messages.LOG_LINK_VALIDATION_READBYID_FAILED_2, 318 relation.getTargetId().toString(), 319 project.getName()), 320 e); 321 } 322 m_driverManager.getVfsDriver(dbc).readResource(dbc, project.getUuid(), relation.getTargetPath(), true); 323 } 324 } catch (CmsException e) { 325 // ... or if the linked resource is a resource that gets actually published 326 if (LOG.isDebugEnabled()) { 327 LOG.debug( 328 Messages.get().getBundle().key( 329 Messages.LOG_LINK_VALIDATION_READBYPATH_FAILED_2, 330 relation.getTargetPath(), 331 project.getName()), 332 e); 333 } 334 if (!fileLookup.containsKey(link)) { 335 isValidLink = false; 336 } 337 } finally { 338 // ... and if the linked resource to be published get deleted 339 if (fileLookup.containsKey(link)) { 340 CmsResource offlineResource = fileLookup.get(link); 341 if (offlineResource.getState().isDeleted()) { 342 if (LOG.isDebugEnabled()) { 343 LOG.debug(Messages.get().getBundle().key(Messages.LOG_LINK_VALIDATION_RESOURCEDELETED_1, link)); 344 } 345 isValidLink = false; 346 } 347 } 348 } 349 return isValidLink; 350 } 351 352 /** 353 * Validates the links for the specified resource.<p> 354 * 355 * @param dbc the database context 356 * @param resource the resource that will be validated 357 * @param fileLookup a map for faster lookup with all resources keyed by their rootpath 358 * @param project the project to validate 359 * @param report the report to write to 360 * 361 * @return a list with the broken links as {@link CmsRelation} objects for the specified resource, 362 * or an empty list if no broken links were found 363 */ 364 protected List<CmsRelation> validateLinks( 365 CmsDbContext dbc, 366 CmsResource resource, 367 Map<String, CmsResource> fileLookup, 368 CmsProject project, 369 I_CmsReport report) { 370 371 List<CmsRelation> brokenRelations = new ArrayList<CmsRelation>(); 372 Map<String, Boolean> validatedLinks = new HashMap<String, Boolean>(); 373 374 // get the relations 375 List<CmsRelation> incomingRelationsOnline = new ArrayList<CmsRelation>(); 376 List<CmsRelation> outgoingRelationsOffline = new ArrayList<CmsRelation>(); 377 try { 378 if (!resource.getState().isDeleted()) { 379 // search the target of links in the current (offline) project 380 outgoingRelationsOffline = m_driverManager.getRelationsForResource( 381 dbc, 382 resource, 383 CmsRelationFilter.TARGETS); 384 } else { 385 // search the source of links in the online project 386 CmsProject currentProject = dbc.currentProject(); 387 dbc.getRequestContext().setCurrentProject(project); 388 try { 389 incomingRelationsOnline = m_driverManager.getRelationsForResource( 390 dbc, 391 resource, 392 CmsRelationFilter.SOURCES); 393 } finally { 394 dbc.getRequestContext().setCurrentProject(currentProject); 395 } 396 } 397 } catch (CmsException e) { 398 LOG.error(Messages.get().getBundle().key(Messages.LOG_LINK_SEARCH_1, resource), e); 399 if (report != null) { 400 report.println( 401 Messages.get().container(Messages.LOG_LINK_SEARCH_1, dbc.removeSiteRoot(resource.getRootPath())), 402 I_CmsReport.FORMAT_ERROR); 403 } 404 return brokenRelations; 405 } 406 407 List<CmsRelation> relations = new ArrayList<CmsRelation>(); 408 relations.addAll(incomingRelationsOnline); 409 relations.addAll(outgoingRelationsOffline); 410 // check the relations 411 boolean first = true; 412 Iterator<CmsRelation> itRelations = relations.iterator(); 413 while (itRelations.hasNext()) { 414 CmsRelation relation = itRelations.next(); 415 String link; 416 if (!resource.getState().isDeleted()) { 417 link = relation.getTargetPath(); 418 } else { 419 link = relation.getSourcePath(); 420 } 421 if (CmsStringUtil.isEmptyOrWhitespaceOnly(link)) { 422 // skip empty links 423 continue; 424 } 425 if (validatedLinks.keySet().contains(link)) { 426 // skip already validated links 427 if (validatedLinks.get(link).booleanValue()) { 428 // add broken relation of different type 429 brokenRelations.add(relation); 430 } 431 continue; 432 } 433 boolean result; 434 if (resource.getState().isDeleted()) { 435 result = checkLinkForDeletedLinkTarget(relation, link, fileLookup); 436 } else { 437 result = checkLinkForNewOrChangedLinkSource(dbc, resource, relation, link, project, fileLookup); 438 439 } 440 boolean isValidLink = result; 441 if (!isValidLink) { 442 if (first) { 443 if (report != null) { 444 report.println( 445 Messages.get().container(Messages.RPT_HTMLLINK_FOUND_BROKEN_LINKS_0), 446 I_CmsReport.FORMAT_WARNING); 447 } 448 first = false; 449 } 450 brokenRelations.add(relation); 451 if (report != null) { 452 if (!resource.getState().isDeleted()) { 453 report.println( 454 Messages.get().container( 455 Messages.RPT_HTMLLINK_BROKEN_TARGET_2, 456 relation.getSourcePath(), 457 dbc.removeSiteRoot(link)), 458 I_CmsReport.FORMAT_WARNING); 459 } else { 460 report.println( 461 Messages.get().container( 462 Messages.RPT_HTMLLINK_BROKEN_SOURCE_2, 463 dbc.removeSiteRoot(link), 464 relation.getTargetPath()), 465 I_CmsReport.FORMAT_WARNING); 466 } 467 } 468 } 469 validatedLinks.put(link, Boolean.valueOf(!isValidLink)); 470 } 471 return brokenRelations; 472 } 473 474}