001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.relations;
029
030import org.opencms.db.CmsDbContext;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishList;
033import org.opencms.file.CmsProject;
034import org.opencms.file.CmsResource;
035import org.opencms.file.CmsResourceFilter;
036import org.opencms.file.CmsVfsResourceNotFoundException;
037import org.opencms.file.types.I_CmsResourceType;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsIllegalStateException;
040import org.opencms.main.CmsLog;
041import org.opencms.main.OpenCms;
042import org.opencms.report.I_CmsReport;
043import org.opencms.util.CmsStringUtil;
044import org.opencms.workplace.threads.A_CmsProgressThread;
045
046import java.util.ArrayList;
047import java.util.HashMap;
048import java.util.Iterator;
049import java.util.List;
050import java.util.Map;
051
052import org.apache.commons.logging.Log;
053
054/**
055 * Validates relations of resources in the OpenCms VFS.<p>
056 *
057 * Relations are, for instance, href attribs in anchor tags and src attribs in
058 * image tags, as well as OpenCmsVfsFile values in Xml Content.<p>
059 *
060 * External links to targets outside the OpenCms VFS don't get validated.<p>
061 *
062 * Objects using this class are responsible to handle detected broken links.<p>
063 *
064 * @since 6.3.0
065 */
066public class CmsRelationSystemValidator {
067
068    /** The log object for this class. */
069    private static final Log LOG = CmsLog.getLog(CmsRelationSystemValidator.class);
070
071    /** The driver manager. */
072    protected CmsDriverManager m_driverManager;
073
074    /**
075     * Default constructor.<p>
076     *
077     * @param driverManager The Cms driver manager
078     */
079    public CmsRelationSystemValidator(CmsDriverManager driverManager) {
080
081        m_driverManager = driverManager;
082    }
083
084    /**
085     * Validates the relations against the online project.<p>
086     *
087     * The result is printed to the given report.<p>
088     *
089     * Validating references means to answer the question, whether
090     * we would have broken links in the online project if the given
091     * publish list would get published.<p>
092     *
093     * @param dbc the database context
094     * @param publishList the publish list to validate
095     * @param report a report to print messages
096     *
097     * @return a map with lists of invalid links
098     *          (<code>{@link org.opencms.relations.CmsRelation}}</code> objects)
099     *          keyed by root paths
100     *
101     * @throws Exception if something goes wrong
102     */
103    public Map<String, List<CmsRelation>> validateResources(
104        CmsDbContext dbc,
105        CmsPublishList publishList,
106        I_CmsReport report)
107    throws Exception {
108
109        // check if progress should be set in the thread
110        A_CmsProgressThread thread = null;
111        if (Thread.currentThread() instanceof A_CmsProgressThread) {
112            thread = (A_CmsProgressThread)Thread.currentThread();
113        }
114
115        Map<String, List<CmsRelation>> invalidResources = new HashMap<String, List<CmsRelation>>();
116        boolean interProject = (publishList != null);
117        if (report != null) {
118            report.println(
119                Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_BEGIN_0),
120                I_CmsReport.FORMAT_HEADLINE);
121        }
122        List<CmsResource> resources = new ArrayList<CmsResource>();
123        if (publishList == null) {
124            CmsResourceFilter filter = CmsResourceFilter.IGNORE_EXPIRATION;
125            List<I_CmsResourceType> resTypes = OpenCms.getResourceManager().getResourceTypes();
126            Iterator<I_CmsResourceType> itTypes = resTypes.iterator();
127            int count = 0;
128            while (itTypes.hasNext()) {
129
130                // set progress in thread (first 10 percent)
131                count++;
132                if (thread != null) {
133
134                    if (thread.isInterrupted()) {
135                        throw new CmsIllegalStateException(
136                            org.opencms.workplace.commons.Messages.get().container(
137                                org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
138                    }
139                    thread.setProgress((count * 10) / resTypes.size());
140                }
141
142                I_CmsResourceType type = itTypes.next();
143                if (type instanceof I_CmsLinkParseable) {
144                    filter = filter.addRequireType(type.getTypeId());
145                    try {
146                        resources.addAll(
147                            m_driverManager.readResources(
148                                dbc,
149                                m_driverManager.readResource(dbc, "/", filter),
150                                filter,
151                                true));
152                    } catch (CmsException e) {
153                        LOG.error(
154                            Messages.get().getBundle().key(Messages.LOG_RETRIEVAL_RESOURCES_1, type.getTypeName()),
155                            e);
156                    }
157                }
158            }
159        } else {
160            resources.addAll(publishList.getAllResources());
161        }
162
163        // populate a lookup map with the project resources that
164        // actually get published keyed by their resource names.
165        // second, resources that don't get validated are ignored.
166        Map<String, CmsResource> offlineFilesLookup = new HashMap<String, CmsResource>();
167        Iterator<CmsResource> itResources = resources.iterator();
168        int count = 0;
169        while (itResources.hasNext()) {
170
171            // set progress in thread (next 10 percent)
172            count++;
173            if (thread != null) {
174
175                if (thread.isInterrupted()) {
176                    throw new CmsIllegalStateException(
177                        org.opencms.workplace.commons.Messages.get().container(
178                            org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
179                }
180                thread.setProgress(((count * 10) / resources.size()) + 10);
181            }
182
183            CmsResource resource = itResources.next();
184            offlineFilesLookup.put(resource.getRootPath(), resource);
185            offlineFilesLookup.put(resource.getStructureId().toString(), resource);
186        }
187        CmsProject project = dbc.currentProject();
188        if (interProject) {
189            try {
190                project = m_driverManager.readProject(dbc, CmsProject.ONLINE_PROJECT_ID);
191            } catch (CmsException e) {
192                // should never happen
193                LOG.error(e.getLocalizedMessage(), e);
194            }
195        }
196
197        boolean foundBrokenLinks = false;
198        for (int index = 0, size = resources.size(); index < size; index++) {
199
200            // set progress in thread (next 20 percent; leave rest for creating the list and the html)
201            if (thread != null) {
202
203                if (thread.isInterrupted()) {
204                    throw new CmsIllegalStateException(
205                        org.opencms.workplace.commons.Messages.get().container(
206                            org.opencms.workplace.commons.Messages.ERR_PROGRESS_INTERRUPTED_0));
207                }
208                thread.setProgress(((index * 20) / resources.size()) + 20);
209            }
210
211            CmsResource resource = resources.get(index);
212            String resourceName = resource.getRootPath();
213
214            if (report != null) {
215                report.print(
216                    org.opencms.report.Messages.get().container(
217                        org.opencms.report.Messages.RPT_SUCCESSION_2,
218                        Integer.valueOf(index + 1),
219                        Integer.valueOf(size)),
220                    I_CmsReport.FORMAT_NOTE);
221                report.print(Messages.get().container(Messages.RPT_HTMLLINK_VALIDATING_0), I_CmsReport.FORMAT_NOTE);
222                report.print(
223                    org.opencms.report.Messages.get().container(
224                        org.opencms.report.Messages.RPT_ARGUMENT_1,
225                        dbc.removeSiteRoot(resourceName)));
226                report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0));
227            }
228            List<CmsRelation> brokenLinks = validateLinks(dbc, resource, offlineFilesLookup, project, report);
229            if (brokenLinks.size() > 0) {
230                // the resource contains broken links
231                invalidResources.put(resourceName, brokenLinks);
232                foundBrokenLinks = true;
233            } else {
234                // the resource contains *NO* broken links
235                if (report != null) {
236                    report.println(
237                        org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
238                        I_CmsReport.FORMAT_OK);
239                }
240            }
241        }
242
243        if (foundBrokenLinks) {
244            // print a summary if we found broken links in the validated resources
245            if (report != null) {
246                report.println(
247                    Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_ERROR_0),
248                    I_CmsReport.FORMAT_ERROR);
249            }
250        }
251        if (report != null) {
252            report.println(
253                Messages.get().container(Messages.RPT_HTMLLINK_VALIDATOR_END_0),
254                I_CmsReport.FORMAT_HEADLINE);
255        }
256        return invalidResources;
257    }
258
259    /**
260     * Checks a link to a resource which has been deleted.<p>
261     * @param relation
262     *
263     * @param link the URI of the resource which has a link to the deleted resource
264     * @param fileLookup a lookup table of files to be published
265     * @param relationTargets
266     *
267     * @return true if the resource which has a link to the deleted resource is also going to be deleted
268     */
269    protected boolean checkLinkForDeletedLinkTarget(
270        CmsRelation relation,
271        String link,
272        Map<String, CmsResource> fileLookup) {
273
274        boolean isValidLink = false;
275        // since we are going to delete the resource
276        // check if the linked resource is also to be deleted
277        if (fileLookup.containsKey(link) || fileLookup.containsKey(relation.getSourceId().toString())) {
278            // Technically, if the relation source is going to be published too and is not deleted, the link is not valid. But in that case, validateLinks will also be called for that resource and detect broken the broken link there.
279            isValidLink = true;
280        }
281        return isValidLink;
282    }
283
284    /**
285     * Checks a link from a resource which has changed.<p>
286     *
287     * @param dbc the current dbc
288     * @param resource the link source
289     * @param relation the relation
290     * @param link the link target
291     * @param project the current project
292     * @param fileLookup a lookup table which contains the files which are going to be published
293     *
294     * @return true if the link will be valid after publishing
295     */
296    protected boolean checkLinkForNewOrChangedLinkSource(
297        CmsDbContext dbc,
298        CmsResource resource,
299        CmsRelation relation,
300        String link,
301        CmsProject project,
302        Map<String, CmsResource> fileLookup) {
303
304        boolean isValidLink = true;
305        // the link is valid...
306        try {
307            // ... if the linked resource exists in the online project
308            // search the target of link in the online project
309            try {
310                link = m_driverManager.getVfsDriver(
311                    dbc).readResource(dbc, project.getUuid(), relation.getTargetId(), true).getRootPath();
312            } catch (CmsVfsResourceNotFoundException e) {
313                // reading by id failed, this means that the link variable still equals relation.getTargetPath()
314                if (LOG.isDebugEnabled()) {
315                    LOG.debug(
316                        Messages.get().getBundle().key(
317                            Messages.LOG_LINK_VALIDATION_READBYID_FAILED_2,
318                            relation.getTargetId().toString(),
319                            project.getName()),
320                        e);
321                }
322                m_driverManager.getVfsDriver(dbc).readResource(dbc, project.getUuid(), relation.getTargetPath(), true);
323            }
324        } catch (CmsException e) {
325            // ... or if the linked resource is a resource that gets actually published
326            if (LOG.isDebugEnabled()) {
327                LOG.debug(
328                    Messages.get().getBundle().key(
329                        Messages.LOG_LINK_VALIDATION_READBYPATH_FAILED_2,
330                        relation.getTargetPath(),
331                        project.getName()),
332                    e);
333            }
334            if (!fileLookup.containsKey(link)) {
335                isValidLink = false;
336            }
337        } finally {
338            // ... and if the linked resource to be published get deleted
339            if (fileLookup.containsKey(link)) {
340                CmsResource offlineResource = fileLookup.get(link);
341                if (offlineResource.getState().isDeleted()) {
342                    if (LOG.isDebugEnabled()) {
343                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_LINK_VALIDATION_RESOURCEDELETED_1, link));
344                    }
345                    isValidLink = false;
346                }
347            }
348        }
349        return isValidLink;
350    }
351
352    /**
353     * Validates the links for the specified resource.<p>
354     *
355     * @param dbc the database context
356     * @param resource the resource that will be validated
357     * @param fileLookup a map for faster lookup with all resources keyed by their rootpath
358     * @param project the project to validate
359     * @param report the report to write to
360     *
361     * @return a list with the broken links as {@link CmsRelation} objects for the specified resource,
362     *          or an empty list if no broken links were found
363     */
364    protected List<CmsRelation> validateLinks(
365        CmsDbContext dbc,
366        CmsResource resource,
367        Map<String, CmsResource> fileLookup,
368        CmsProject project,
369        I_CmsReport report) {
370
371        List<CmsRelation> brokenRelations = new ArrayList<CmsRelation>();
372        Map<String, Boolean> validatedLinks = new HashMap<String, Boolean>();
373
374        // get the relations
375        List<CmsRelation> incomingRelationsOnline = new ArrayList<CmsRelation>();
376        List<CmsRelation> outgoingRelationsOffline = new ArrayList<CmsRelation>();
377        try {
378            if (!resource.getState().isDeleted()) {
379                // search the target of links in the current (offline) project
380                outgoingRelationsOffline = m_driverManager.getRelationsForResource(
381                    dbc,
382                    resource,
383                    CmsRelationFilter.TARGETS);
384            } else {
385                // search the source of links in the online project
386                CmsProject currentProject = dbc.currentProject();
387                dbc.getRequestContext().setCurrentProject(project);
388                try {
389                    incomingRelationsOnline = m_driverManager.getRelationsForResource(
390                        dbc,
391                        resource,
392                        CmsRelationFilter.SOURCES);
393                } finally {
394                    dbc.getRequestContext().setCurrentProject(currentProject);
395                }
396            }
397        } catch (CmsException e) {
398            LOG.error(Messages.get().getBundle().key(Messages.LOG_LINK_SEARCH_1, resource), e);
399            if (report != null) {
400                report.println(
401                    Messages.get().container(Messages.LOG_LINK_SEARCH_1, dbc.removeSiteRoot(resource.getRootPath())),
402                    I_CmsReport.FORMAT_ERROR);
403            }
404            return brokenRelations;
405        }
406
407        List<CmsRelation> relations = new ArrayList<CmsRelation>();
408        relations.addAll(incomingRelationsOnline);
409        relations.addAll(outgoingRelationsOffline);
410        // check the relations
411        boolean first = true;
412        Iterator<CmsRelation> itRelations = relations.iterator();
413        while (itRelations.hasNext()) {
414            CmsRelation relation = itRelations.next();
415            String link;
416            if (!resource.getState().isDeleted()) {
417                link = relation.getTargetPath();
418            } else {
419                link = relation.getSourcePath();
420            }
421            if (CmsStringUtil.isEmptyOrWhitespaceOnly(link)) {
422                // skip empty links
423                continue;
424            }
425            if (validatedLinks.keySet().contains(link)) {
426                // skip already validated links
427                if (validatedLinks.get(link).booleanValue()) {
428                    // add broken relation of different type
429                    brokenRelations.add(relation);
430                }
431                continue;
432            }
433            boolean result;
434            if (resource.getState().isDeleted()) {
435                result = checkLinkForDeletedLinkTarget(relation, link, fileLookup);
436            } else {
437                result = checkLinkForNewOrChangedLinkSource(dbc, resource, relation, link, project, fileLookup);
438
439            }
440            boolean isValidLink = result;
441            if (!isValidLink) {
442                if (first) {
443                    if (report != null) {
444                        report.println(
445                            Messages.get().container(Messages.RPT_HTMLLINK_FOUND_BROKEN_LINKS_0),
446                            I_CmsReport.FORMAT_WARNING);
447                    }
448                    first = false;
449                }
450                brokenRelations.add(relation);
451                if (report != null) {
452                    if (!resource.getState().isDeleted()) {
453                        report.println(
454                            Messages.get().container(
455                                Messages.RPT_HTMLLINK_BROKEN_TARGET_2,
456                                relation.getSourcePath(),
457                                dbc.removeSiteRoot(link)),
458                            I_CmsReport.FORMAT_WARNING);
459                    } else {
460                        report.println(
461                            Messages.get().container(
462                                Messages.RPT_HTMLLINK_BROKEN_SOURCE_2,
463                                dbc.removeSiteRoot(link),
464                                relation.getTargetPath()),
465                            I_CmsReport.FORMAT_WARNING);
466                    }
467                }
468            }
469            validatedLinks.put(link, Boolean.valueOf(!isValidLink));
470        }
471        return brokenRelations;
472    }
473
474}