001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.relations; 029 030import org.opencms.db.CmsDbContext; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishList; 033import org.opencms.file.CmsProject; 034import org.opencms.file.CmsResource; 035import org.opencms.file.CmsResourceFilter; 036import org.opencms.file.CmsVfsResourceNotFoundException; 037import org.opencms.file.types.I_CmsResourceType; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsIllegalStateException; 040import org.opencms.main.CmsLog; 041import org.opencms.main.OpenCms; 042import org.opencms.report.I_CmsReport; 043import org.opencms.util.CmsStringUtil; 044import org.opencms.workplace.threads.A_CmsProgressThread; 045 046import java.util.ArrayList; 047import java.util.HashMap; 048import java.util.Iterator; 049import java.util.List; 050import java.util.Map; 051import java.util.Set; 052 053import org.apache.commons.logging.Log; 054 055import com.google.common.collect.HashMultimap; 056 057/** 058 * Validates relations of resources in the OpenCms VFS.<p> 059 * 060 * Relations are, for instance, href attribs in anchor tags and src attribs in 061 * image tags, as well as OpenCmsVfsFile values in Xml Content.<p> 062 * 063 * External links to targets outside the OpenCms VFS don't get validated.<p> 064 * 065 * Objects using this class are responsible to handle detected broken links.<p> 066 * 067 * @since 6.3.0 068 */ 069public class CmsRelationSystemValidator { 070 071 /** The log object for this class. */ 072 private static final Log LOG = CmsLog.getLog(CmsRelationSystemValidator.class); 073 074 /** The driver manager. */ 075 protected CmsDriverManager m_driverManager; 076 077 /** 078 * Default constructor.<p> 079 * 080 * @param driverManager The Cms driver manager 081 */ 082 public CmsRelationSystemValidator(CmsDriverManager driverManager) { 083 084 m_driverManager = driverManager; 085 } 086 087 /** 088 * Validates the relations against the online project.<p> 089 * 090 * The result is printed to the given report.<p> 091 * 092 * Validating references means to answer the question, whether 093 * we would have broken links in the online project if the given 094 * publish list would get published.<p> 095 * 096 * @param dbc the database context 097 * @param publishList the publish list to validate 098 * @param report a report to print messages 099 * 100 * @return a map with lists of invalid links 101 * (<code>{@link org.opencms.relations.CmsRelation}}</code> objects) 102 * keyed by root paths 103 * 104 * @throws Exception if something goes wrong 105 */ 106 public Map<String, List<CmsRelation>> validateResources( 107 CmsDbContext dbc, 108 CmsPublishList publishList, 109 I_CmsReport report) throws Exception { 110 111 // check if progress should be set in the thread 112 A_CmsProgressThread thread = null; 113 if (Thread.currentThread() instanceof A_CmsProgressThread) { 114 thread = (A_CmsProgressThread)Thread.currentThread(); 115 } 116 117 Map<String, List<CmsRelation>> invalidResources = new HashMap<String, List<CmsRelation>>(); 118 boolean interProject = (publishList != null); 119 if (report != null) { 120 report.println( 121 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_BEGIN_0), 122 I_CmsReport.FORMAT_HEADLINE); 123 } 124 List<CmsResource> resources = new ArrayList<CmsResource>(); 125 if (publishList == null) { 126 CmsResourceFilter filter = CmsResourceFilter.IGNORE_EXPIRATION; 127 List<I_CmsResourceType> resTypes = OpenCms.getResourceManager().getResourceTypes(); 128 Iterator<I_CmsResourceType> itTypes = resTypes.iterator(); 129 int count = 0; 130 while (itTypes.hasNext()) { 131 132 // set progress in thread (first 10 percent) 133 count++; 134 if (thread != null) { 135 136 if (thread.isInterrupted()) { 137 throw new CmsIllegalStateException( 138 org.opencms.workplace.commons.Messages.get().container( 139 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 140 } 141 thread.setProgress((count * 10) / resTypes.size()); 142 } 143 144 I_CmsResourceType type = itTypes.next(); 145 if (type instanceof I_CmsLinkParseable) { 146 filter = filter.addRequireType(type.getTypeId()); 147 try { 148 resources.addAll( 149 m_driverManager.readResources( 150 dbc, 151 m_driverManager.readResource(dbc, "/", filter), 152 filter, 153 true)); 154 } catch (CmsException e) { 155 LOG.error( 156 Messages.get().getBundle().key(Messages.LOG_RETRIEVAL_RESOURCES_1, type.getTypeName()), 157 e); 158 } 159 } 160 } 161 } else { 162 resources.addAll(publishList.getAllResources()); 163 } 164 165 // populate a lookup map with the project resources that 166 // actually get published keyed by their resource names. 167 // second, resources that don't get validated are ignored. 168 Map<String, CmsResource> offlineFilesLookup = new HashMap<String, CmsResource>(); 169 Iterator<CmsResource> itResources = resources.iterator(); 170 int count = 0; 171 while (itResources.hasNext()) { 172 173 // set progress in thread (next 10 percent) 174 count++; 175 if (thread != null) { 176 177 if (thread.isInterrupted()) { 178 throw new CmsIllegalStateException( 179 org.opencms.workplace.commons.Messages.get().container( 180 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 181 } 182 thread.setProgress(((count * 10) / resources.size()) + 10); 183 } 184 185 CmsResource resource = itResources.next(); 186 offlineFilesLookup.put(resource.getRootPath(), resource); 187 } 188 CmsProject project = dbc.currentProject(); 189 if (interProject) { 190 try { 191 project = m_driverManager.readProject(dbc, CmsProject.ONLINE_PROJECT_ID); 192 } catch (CmsException e) { 193 // should never happen 194 LOG.error(e.getLocalizedMessage(), e); 195 } 196 } 197 198 boolean foundBrokenLinks = false; 199 for (int index = 0, size = resources.size(); index < size; index++) { 200 201 // set progress in thread (next 20 percent; leave rest for creating the list and the html) 202 if (thread != null) { 203 204 if (thread.isInterrupted()) { 205 throw new CmsIllegalStateException( 206 org.opencms.workplace.commons.Messages.get().container( 207 org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0)); 208 } 209 thread.setProgress(((index * 20) / resources.size()) + 20); 210 } 211 212 CmsResource resource = resources.get(index); 213 String resourceName = resource.getRootPath(); 214 215 if (report != null) { 216 report.print( 217 org.opencms.report.Messages.get().container( 218 org.opencms.report.Messages.RPT_SUCCESSION_2, 219 Integer.valueOf(index + 1), 220 Integer.valueOf(size)), 221 I_CmsReport.FORMAT_NOTE); 222 report.print(Messages.get().container(Messages.RPT_HTMLLINK_VALIDATING_0), I_CmsReport.FORMAT_NOTE); 223 report.print( 224 org.opencms.report.Messages.get().container( 225 org.opencms.report.Messages.RPT_ARGUMENT_1, 226 dbc.removeSiteRoot(resourceName))); 227 report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0)); 228 } 229 List<CmsRelation> brokenLinks = validateLinks(dbc, resource, offlineFilesLookup, project, report); 230 if (brokenLinks.size() > 0) { 231 // the resource contains broken links 232 invalidResources.put(resourceName, brokenLinks); 233 foundBrokenLinks = true; 234 } else { 235 // the resource contains *NO* broken links 236 if (report != null) { 237 report.println( 238 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 239 I_CmsReport.FORMAT_OK); 240 } 241 } 242 } 243 244 if (foundBrokenLinks) { 245 // print a summary if we found broken links in the validated resources 246 if (report != null) { 247 report.println( 248 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_ERROR_0), 249 I_CmsReport.FORMAT_ERROR); 250 } 251 } 252 if (report != null) { 253 report.println( 254 Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_END_0), 255 I_CmsReport.FORMAT_HEADLINE); 256 } 257 return invalidResources; 258 } 259 260 /** 261 * Checks a link to a resource which has been deleted.<p> 262 * @param relation 263 * 264 * @param link the URI of the resource which has a link to the deleted resource 265 * @param fileLookup a lookup table of files to be published 266 * @param relationTargets 267 * 268 * @return true if the resource which has a link to the deleted resource is also going to be deleted 269 */ 270 protected boolean checkLinkForDeletedLinkTarget( 271 CmsRelation relation, 272 String link, 273 Map<String, CmsResource> fileLookup, 274 HashMultimap<String, String> relationTargets) { 275 276 boolean isValidLink = false; 277 // since we are going to delete the resource 278 // check if the linked resource is also to be deleted 279 if (fileLookup.containsKey(link)) { 280 CmsResource offlineResource = fileLookup.get(link); 281 Set<String> relationTargetsForLink = relationTargets.get(link); 282 boolean hasNoRelations = !relationTargetsForLink.contains(relation.getTargetPath()) 283 && !relationTargetsForLink.contains(relation.getTargetId().toString()); 284 isValidLink = offlineResource.getState().isDeleted() || hasNoRelations; 285 } 286 return isValidLink; 287 } 288 289 /** 290 * Checks a link from a resource which has changed.<p> 291 * 292 * @param dbc the current dbc 293 * @param resource the link source 294 * @param relation the relation 295 * @param link the link target 296 * @param project the current project 297 * @param fileLookup a lookup table which contains the files which are going to be published 298 * 299 * @return true if the link will be valid after publishing 300 */ 301 protected boolean checkLinkForNewOrChangedLinkSource( 302 CmsDbContext dbc, 303 CmsResource resource, 304 CmsRelation relation, 305 String link, 306 CmsProject project, 307 Map<String, CmsResource> fileLookup) { 308 309 boolean isValidLink = true; 310 // the link is valid... 311 try { 312 // ... if the linked resource exists in the online project 313 // search the target of link in the online project 314 try { 315 link = m_driverManager.getVfsDriver(dbc).readResource( 316 dbc, 317 project.getUuid(), 318 relation.getTargetId(), 319 true).getRootPath(); 320 } catch (CmsVfsResourceNotFoundException e) { 321 // reading by id failed, this means that the link variable still equals relation.getTargetPath() 322 if (LOG.isDebugEnabled()) { 323 LOG.debug( 324 Messages.get().getBundle().key( 325 Messages.LOG_LINK_VALIDATION_READBYID_FAILED_2, 326 relation.getTargetId().toString(), 327 project.getName()), 328 e); 329 } 330 m_driverManager.getVfsDriver(dbc).readResource(dbc, project.getUuid(), relation.getTargetPath(), true); 331 } 332 } catch (CmsException e) { 333 // ... or if the linked resource is a resource that gets actually published 334 if (LOG.isDebugEnabled()) { 335 LOG.debug( 336 Messages.get().getBundle().key( 337 Messages.LOG_LINK_VALIDATION_READBYPATH_FAILED_2, 338 relation.getTargetPath(), 339 project.getName()), 340 e); 341 } 342 if (!fileLookup.containsKey(link)) { 343 isValidLink = false; 344 } 345 } finally { 346 // ... and if the linked resource to be published get deleted 347 if (fileLookup.containsKey(link)) { 348 CmsResource offlineResource = fileLookup.get(link); 349 if (offlineResource.getState().isDeleted()) { 350 if (LOG.isDebugEnabled()) { 351 LOG.debug(Messages.get().getBundle().key(Messages.LOG_LINK_VALIDATION_RESOURCEDELETED_1, link)); 352 } 353 isValidLink = false; 354 } 355 } 356 } 357 return isValidLink; 358 } 359 360 /** 361 * Validates the links for the specified resource.<p> 362 * 363 * @param dbc the database context 364 * @param resource the resource that will be validated 365 * @param fileLookup a map for faster lookup with all resources keyed by their rootpath 366 * @param project the project to validate 367 * @param report the report to write to 368 * 369 * @return a list with the broken links as {@link CmsRelation} objects for the specified resource, 370 * or an empty list if no broken links were found 371 */ 372 protected List<CmsRelation> validateLinks( 373 CmsDbContext dbc, 374 CmsResource resource, 375 Map<String, CmsResource> fileLookup, 376 CmsProject project, 377 I_CmsReport report) { 378 379 List<CmsRelation> brokenRelations = new ArrayList<CmsRelation>(); 380 Map<String, Boolean> validatedLinks = new HashMap<String, Boolean>(); 381 382 // get the relations 383 List<CmsRelation> incomingRelationsOnline = new ArrayList<CmsRelation>(); 384 List<CmsRelation> outgoingRelationsOffline = new ArrayList<CmsRelation>(); 385 try { 386 if (!resource.getState().isDeleted()) { 387 // search the target of links in the current (offline) project 388 outgoingRelationsOffline = m_driverManager.getRelationsForResource( 389 dbc, 390 resource, 391 CmsRelationFilter.TARGETS); 392 } else { 393 // search the source of links in the online project 394 CmsProject currentProject = dbc.currentProject(); 395 dbc.getRequestContext().setCurrentProject(project); 396 try { 397 incomingRelationsOnline = m_driverManager.getRelationsForResource( 398 dbc, 399 resource, 400 CmsRelationFilter.SOURCES); 401 } finally { 402 dbc.getRequestContext().setCurrentProject(currentProject); 403 } 404 } 405 } catch (CmsException e) { 406 LOG.error(Messages.get().getBundle().key(Messages.LOG_LINK_SEARCH_1, resource), e); 407 if (report != null) { 408 report.println( 409 Messages.get().container(Messages.LOG_LINK_SEARCH_1, dbc.removeSiteRoot(resource.getRootPath())), 410 I_CmsReport.FORMAT_ERROR); 411 } 412 return brokenRelations; 413 } 414 415 List<CmsRelation> relations = new ArrayList<CmsRelation>(); 416 relations.addAll(incomingRelationsOnline); 417 relations.addAll(outgoingRelationsOffline); 418 HashMultimap<String, String> outgoingRelationTargets = HashMultimap.create(); 419 for (CmsRelation outRelation : outgoingRelationsOffline) { 420 String sourcePath = outRelation.getSourcePath(); 421 String targetId = outRelation.getTargetId().toString(); 422 String targetPath = outRelation.getTargetPath(); 423 outgoingRelationTargets.put(sourcePath, targetId); 424 outgoingRelationTargets.put(sourcePath, targetPath); 425 } 426 // check the relations 427 boolean first = true; 428 Iterator<CmsRelation> itRelations = relations.iterator(); 429 while (itRelations.hasNext()) { 430 CmsRelation relation = itRelations.next(); 431 String link; 432 if (!resource.getState().isDeleted()) { 433 link = relation.getTargetPath(); 434 } else { 435 link = relation.getSourcePath(); 436 } 437 if (CmsStringUtil.isEmptyOrWhitespaceOnly(link)) { 438 // skip empty links 439 continue; 440 } 441 if (validatedLinks.keySet().contains(link)) { 442 // skip already validated links 443 if (validatedLinks.get(link).booleanValue()) { 444 // add broken relation of different type 445 brokenRelations.add(relation); 446 } 447 continue; 448 } 449 boolean result; 450 if (resource.getState().isDeleted()) { 451 result = checkLinkForDeletedLinkTarget(relation, link, fileLookup, outgoingRelationTargets); 452 } else { 453 result = checkLinkForNewOrChangedLinkSource(dbc, resource, relation, link, project, fileLookup); 454 455 } 456 boolean isValidLink = result; 457 if (!isValidLink) { 458 if (first) { 459 if (report != null) { 460 report.println( 461 Messages.get().container(Messages.RPT_HTMLLINK_FOUND_BROKEN_LINKS_0), 462 I_CmsReport.FORMAT_WARNING); 463 } 464 first = false; 465 } 466 brokenRelations.add(relation); 467 if (report != null) { 468 if (!resource.getState().isDeleted()) { 469 report.println( 470 Messages.get().container( 471 Messages.RPT_HTMLLINK_BROKEN_TARGET_2, 472 relation.getSourcePath(), 473 dbc.removeSiteRoot(link)), 474 I_CmsReport.FORMAT_WARNING); 475 } else { 476 report.println( 477 Messages.get().container( 478 Messages.RPT_HTMLLINK_BROKEN_SOURCE_2, 479 dbc.removeSiteRoot(link), 480 relation.getTargetPath()), 481 I_CmsReport.FORMAT_WARNING); 482 } 483 } 484 } 485 validatedLinks.put(link, Boolean.valueOf(!isValidLink)); 486 } 487 return brokenRelations; 488 } 489 490}