001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.relations;
029
030import org.opencms.db.CmsDbContext;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishList;
033import org.opencms.file.CmsProject;
034import org.opencms.file.CmsResource;
035import org.opencms.file.CmsResourceFilter;
036import org.opencms.file.CmsVfsResourceNotFoundException;
037import org.opencms.file.types.I_CmsResourceType;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsIllegalStateException;
040import org.opencms.main.CmsLog;
041import org.opencms.main.OpenCms;
042import org.opencms.report.I_CmsReport;
043import org.opencms.util.CmsStringUtil;
044import org.opencms.workplace.threads.A_CmsProgressThread;
045
046import java.util.ArrayList;
047import java.util.HashMap;
048import java.util.Iterator;
049import java.util.List;
050import java.util.Map;
051import java.util.Set;
052
053import org.apache.commons.logging.Log;
054
055import com.google.common.collect.HashMultimap;
056
057/**
058 * Validates relations of resources in the OpenCms VFS.<p>
059 *
060 * Relations are, for instance, href attribs in anchor tags and src attribs in
061 * image tags, as well as OpenCmsVfsFile values in Xml Content.<p>
062 *
063 * External links to targets outside the OpenCms VFS don't get validated.<p>
064 *
065 * Objects using this class are responsible to handle detected broken links.<p>
066 *
067 * @since 6.3.0
068 */
069public class CmsRelationSystemValidator {
070
071    /** The log object for this class. */
072    private static final Log LOG = CmsLog.getLog(CmsRelationSystemValidator.class);
073
074    /** The driver manager. */
075    protected CmsDriverManager m_driverManager;
076
077    /**
078     * Default constructor.<p>
079     *
080     * @param driverManager The Cms driver manager
081     */
082    public CmsRelationSystemValidator(CmsDriverManager driverManager) {
083
084        m_driverManager = driverManager;
085    }
086
087    /**
088     * Validates the relations against the online project.<p>
089     *
090     * The result is printed to the given report.<p>
091     *
092     * Validating references means to answer the question, whether
093     * we would have broken links in the online project if the given
094     * publish list would get published.<p>
095     *
096     * @param dbc the database context
097     * @param publishList the publish list to validate
098     * @param report a report to print messages
099     *
100     * @return a map with lists of invalid links
101     *          (<code>{@link org.opencms.relations.CmsRelation}}</code> objects)
102     *          keyed by root paths
103     *
104     * @throws Exception if something goes wrong
105     */
106    public Map<String, List<CmsRelation>> validateResources(
107        CmsDbContext dbc,
108        CmsPublishList publishList,
109        I_CmsReport report) throws Exception {
110
111        // check if progress should be set in the thread
112        A_CmsProgressThread thread = null;
113        if (Thread.currentThread() instanceof A_CmsProgressThread) {
114            thread = (A_CmsProgressThread)Thread.currentThread();
115        }
116
117        Map<String, List<CmsRelation>> invalidResources = new HashMap<String, List<CmsRelation>>();
118        boolean interProject = (publishList != null);
119        if (report != null) {
120            report.println(
121                Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_BEGIN_0),
122                I_CmsReport.FORMAT_HEADLINE);
123        }
124        List<CmsResource> resources = new ArrayList<CmsResource>();
125        if (publishList == null) {
126            CmsResourceFilter filter = CmsResourceFilter.IGNORE_EXPIRATION;
127            List<I_CmsResourceType> resTypes = OpenCms.getResourceManager().getResourceTypes();
128            Iterator<I_CmsResourceType> itTypes = resTypes.iterator();
129            int count = 0;
130            while (itTypes.hasNext()) {
131
132                // set progress in thread (first 10 percent)
133                count++;
134                if (thread != null) {
135
136                    if (thread.isInterrupted()) {
137                        throw new CmsIllegalStateException(
138                            org.opencms.workplace.commons.Messages.get().container(
139                                org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
140                    }
141                    thread.setProgress((count * 10) / resTypes.size());
142                }
143
144                I_CmsResourceType type = itTypes.next();
145                if (type instanceof I_CmsLinkParseable) {
146                    filter = filter.addRequireType(type.getTypeId());
147                    try {
148                        resources.addAll(
149                            m_driverManager.readResources(
150                                dbc,
151                                m_driverManager.readResource(dbc, "/", filter),
152                                filter,
153                                true));
154                    } catch (CmsException e) {
155                        LOG.error(
156                            Messages.get().getBundle().key(Messages.LOG_RETRIEVAL_RESOURCES_1, type.getTypeName()),
157                            e);
158                    }
159                }
160            }
161        } else {
162            resources.addAll(publishList.getAllResources());
163        }
164
165        // populate a lookup map with the project resources that
166        // actually get published keyed by their resource names.
167        // second, resources that don't get validated are ignored.
168        Map<String, CmsResource> offlineFilesLookup = new HashMap<String, CmsResource>();
169        Iterator<CmsResource> itResources = resources.iterator();
170        int count = 0;
171        while (itResources.hasNext()) {
172
173            // set progress in thread (next 10 percent)
174            count++;
175            if (thread != null) {
176
177                if (thread.isInterrupted()) {
178                    throw new CmsIllegalStateException(
179                        org.opencms.workplace.commons.Messages.get().container(
180                            org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
181                }
182                thread.setProgress(((count * 10) / resources.size()) + 10);
183            }
184
185            CmsResource resource = itResources.next();
186            offlineFilesLookup.put(resource.getRootPath(), resource);
187        }
188        CmsProject project = dbc.currentProject();
189        if (interProject) {
190            try {
191                project = m_driverManager.readProject(dbc, CmsProject.ONLINE_PROJECT_ID);
192            } catch (CmsException e) {
193                // should never happen
194                LOG.error(e.getLocalizedMessage(), e);
195            }
196        }
197
198        boolean foundBrokenLinks = false;
199        for (int index = 0, size = resources.size(); index < size; index++) {
200
201            // set progress in thread (next 20 percent; leave rest for creating the list and the html)
202            if (thread != null) {
203
204                if (thread.isInterrupted()) {
205                    throw new CmsIllegalStateException(
206                        org.opencms.workplace.commons.Messages.get().container(
207                            org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
208                }
209                thread.setProgress(((index * 20) / resources.size()) + 20);
210            }
211
212            CmsResource resource = resources.get(index);
213            String resourceName = resource.getRootPath();
214
215            if (report != null) {
216                report.print(
217                    org.opencms.report.Messages.get().container(
218                        org.opencms.report.Messages.RPT_SUCCESSION_2,
219                        Integer.valueOf(index + 1),
220                        Integer.valueOf(size)),
221                    I_CmsReport.FORMAT_NOTE);
222                report.print(Messages.get().container(Messages.RPT_HTMLLINK_VALIDATING_0), I_CmsReport.FORMAT_NOTE);
223                report.print(
224                    org.opencms.report.Messages.get().container(
225                        org.opencms.report.Messages.RPT_ARGUMENT_1,
226                        dbc.removeSiteRoot(resourceName)));
227                report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0));
228            }
229            List<CmsRelation> brokenLinks = validateLinks(dbc, resource, offlineFilesLookup, project, report);
230            if (brokenLinks.size() > 0) {
231                // the resource contains broken links
232                invalidResources.put(resourceName, brokenLinks);
233                foundBrokenLinks = true;
234            } else {
235                // the resource contains *NO* broken links
236                if (report != null) {
237                    report.println(
238                        org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
239                        I_CmsReport.FORMAT_OK);
240                }
241            }
242        }
243
244        if (foundBrokenLinks) {
245            // print a summary if we found broken links in the validated resources
246            if (report != null) {
247                report.println(
248                    Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_ERROR_0),
249                    I_CmsReport.FORMAT_ERROR);
250            }
251        }
252        if (report != null) {
253            report.println(
254                Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_END_0),
255                I_CmsReport.FORMAT_HEADLINE);
256        }
257        return invalidResources;
258    }
259
260    /**
261     * Checks a link to a resource which has been deleted.<p>
262     * @param relation
263     *
264     * @param link the URI of the resource which has a link to the deleted resource
265     * @param fileLookup a lookup table of files to be published
266     * @param relationTargets
267     *
268     * @return true if the resource which has a link to the deleted resource is also going to be deleted
269     */
270    protected boolean checkLinkForDeletedLinkTarget(
271        CmsRelation relation,
272        String link,
273        Map<String, CmsResource> fileLookup,
274        HashMultimap<String, String> relationTargets) {
275
276        boolean isValidLink = false;
277        // since we are going to delete the resource
278        // check if the linked resource is also to be deleted
279        if (fileLookup.containsKey(link)) {
280            CmsResource offlineResource = fileLookup.get(link);
281            Set<String> relationTargetsForLink = relationTargets.get(link);
282            boolean hasNoRelations = !relationTargetsForLink.contains(relation.getTargetPath())
283                && !relationTargetsForLink.contains(relation.getTargetId().toString());
284            isValidLink = offlineResource.getState().isDeleted() || hasNoRelations;
285        }
286        return isValidLink;
287    }
288
289    /**
290     * Checks a link from a resource which has changed.<p>
291     *
292     * @param dbc the current dbc
293     * @param resource the link source
294     * @param relation the relation
295     * @param link the link target
296     * @param project the current project
297     * @param fileLookup a lookup table which contains the files which are going to be published
298     *
299     * @return true if the link will be valid after publishing
300     */
301    protected boolean checkLinkForNewOrChangedLinkSource(
302        CmsDbContext dbc,
303        CmsResource resource,
304        CmsRelation relation,
305        String link,
306        CmsProject project,
307        Map<String, CmsResource> fileLookup) {
308
309        boolean isValidLink = true;
310        // the link is valid...
311        try {
312            // ... if the linked resource exists in the online project
313            // search the target of link in the online project
314            try {
315                link = m_driverManager.getVfsDriver(dbc).readResource(
316                    dbc,
317                    project.getUuid(),
318                    relation.getTargetId(),
319                    true).getRootPath();
320            } catch (CmsVfsResourceNotFoundException e) {
321                // reading by id failed, this means that the link variable still equals relation.getTargetPath()
322                if (LOG.isDebugEnabled()) {
323                    LOG.debug(
324                        Messages.get().getBundle().key(
325                            Messages.LOG_LINK_VALIDATION_READBYID_FAILED_2,
326                            relation.getTargetId().toString(),
327                            project.getName()),
328                        e);
329                }
330                m_driverManager.getVfsDriver(dbc).readResource(dbc, project.getUuid(), relation.getTargetPath(), true);
331            }
332        } catch (CmsException e) {
333            // ... or if the linked resource is a resource that gets actually published
334            if (LOG.isDebugEnabled()) {
335                LOG.debug(
336                    Messages.get().getBundle().key(
337                        Messages.LOG_LINK_VALIDATION_READBYPATH_FAILED_2,
338                        relation.getTargetPath(),
339                        project.getName()),
340                    e);
341            }
342            if (!fileLookup.containsKey(link)) {
343                isValidLink = false;
344            }
345        } finally {
346            // ... and if the linked resource to be published get deleted
347            if (fileLookup.containsKey(link)) {
348                CmsResource offlineResource = fileLookup.get(link);
349                if (offlineResource.getState().isDeleted()) {
350                    if (LOG.isDebugEnabled()) {
351                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_LINK_VALIDATION_RESOURCEDELETED_1, link));
352                    }
353                    isValidLink = false;
354                }
355            }
356        }
357        return isValidLink;
358    }
359
360    /**
361     * Validates the links for the specified resource.<p>
362     *
363     * @param dbc the database context
364     * @param resource the resource that will be validated
365     * @param fileLookup a map for faster lookup with all resources keyed by their rootpath
366     * @param project the project to validate
367     * @param report the report to write to
368     *
369     * @return a list with the broken links as {@link CmsRelation} objects for the specified resource,
370     *          or an empty list if no broken links were found
371     */
372    protected List<CmsRelation> validateLinks(
373        CmsDbContext dbc,
374        CmsResource resource,
375        Map<String, CmsResource> fileLookup,
376        CmsProject project,
377        I_CmsReport report) {
378
379        List<CmsRelation> brokenRelations = new ArrayList<CmsRelation>();
380        Map<String, Boolean> validatedLinks = new HashMap<String, Boolean>();
381
382        // get the relations
383        List<CmsRelation> incomingRelationsOnline = new ArrayList<CmsRelation>();
384        List<CmsRelation> outgoingRelationsOffline = new ArrayList<CmsRelation>();
385        try {
386            if (!resource.getState().isDeleted()) {
387                // search the target of links in the current (offline) project
388                outgoingRelationsOffline = m_driverManager.getRelationsForResource(
389                    dbc,
390                    resource,
391                    CmsRelationFilter.TARGETS);
392            } else {
393                // search the source of links in the online project
394                CmsProject currentProject = dbc.currentProject();
395                dbc.getRequestContext().setCurrentProject(project);
396                try {
397                    incomingRelationsOnline = m_driverManager.getRelationsForResource(
398                        dbc,
399                        resource,
400                        CmsRelationFilter.SOURCES);
401                } finally {
402                    dbc.getRequestContext().setCurrentProject(currentProject);
403                }
404            }
405        } catch (CmsException e) {
406            LOG.error(Messages.get().getBundle().key(Messages.LOG_LINK_SEARCH_1, resource), e);
407            if (report != null) {
408                report.println(
409                    Messages.get().container(Messages.LOG_LINK_SEARCH_1, dbc.removeSiteRoot(resource.getRootPath())),
410                    I_CmsReport.FORMAT_ERROR);
411            }
412            return brokenRelations;
413        }
414
415        List<CmsRelation> relations = new ArrayList<CmsRelation>();
416        relations.addAll(incomingRelationsOnline);
417        relations.addAll(outgoingRelationsOffline);
418        HashMultimap<String, String> outgoingRelationTargets = HashMultimap.create();
419        for (CmsRelation outRelation : outgoingRelationsOffline) {
420            String sourcePath = outRelation.getSourcePath();
421            String targetId = outRelation.getTargetId().toString();
422            String targetPath = outRelation.getTargetPath();
423            outgoingRelationTargets.put(sourcePath, targetId);
424            outgoingRelationTargets.put(sourcePath, targetPath);
425        }
426        // check the relations
427        boolean first = true;
428        Iterator<CmsRelation> itRelations = relations.iterator();
429        while (itRelations.hasNext()) {
430            CmsRelation relation = itRelations.next();
431            String link;
432            if (!resource.getState().isDeleted()) {
433                link = relation.getTargetPath();
434            } else {
435                link = relation.getSourcePath();
436            }
437            if (CmsStringUtil.isEmptyOrWhitespaceOnly(link)) {
438                // skip empty links
439                continue;
440            }
441            if (validatedLinks.keySet().contains(link)) {
442                // skip already validated links
443                if (validatedLinks.get(link).booleanValue()) {
444                    // add broken relation of different type
445                    brokenRelations.add(relation);
446                }
447                continue;
448            }
449            boolean result;
450            if (resource.getState().isDeleted()) {
451                result = checkLinkForDeletedLinkTarget(relation, link, fileLookup, outgoingRelationTargets);
452            } else {
453                result = checkLinkForNewOrChangedLinkSource(dbc, resource, relation, link, project, fileLookup);
454
455            }
456            boolean isValidLink = result;
457            if (!isValidLink) {
458                if (first) {
459                    if (report != null) {
460                        report.println(
461                            Messages.get().container(Messages.RPT_HTMLLINK_FOUND_BROKEN_LINKS_0),
462                            I_CmsReport.FORMAT_WARNING);
463                    }
464                    first = false;
465                }
466                brokenRelations.add(relation);
467                if (report != null) {
468                    if (!resource.getState().isDeleted()) {
469                        report.println(
470                            Messages.get().container(
471                                Messages.RPT_HTMLLINK_BROKEN_TARGET_2,
472                                relation.getSourcePath(),
473                                dbc.removeSiteRoot(link)),
474                            I_CmsReport.FORMAT_WARNING);
475                    } else {
476                        report.println(
477                            Messages.get().container(
478                                Messages.RPT_HTMLLINK_BROKEN_SOURCE_2,
479                                dbc.removeSiteRoot(link),
480                                relation.getTargetPath()),
481                            I_CmsReport.FORMAT_WARNING);
482                    }
483                }
484            }
485            validatedLinks.put(link, Boolean.valueOf(!isValidLink));
486        }
487        return brokenRelations;
488    }
489
490}