001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.relations;
029
030import org.opencms.file.CmsFile;
031import org.opencms.file.CmsObject;
032import org.opencms.file.CmsResource;
033import org.opencms.file.CmsResourceFilter;
034import org.opencms.file.types.CmsResourceTypePointer;
035import org.opencms.main.CmsException;
036import org.opencms.main.OpenCms;
037import org.opencms.report.CmsLogReport;
038import org.opencms.report.I_CmsReport;
039import org.opencms.scheduler.I_CmsScheduledJob;
040import org.opencms.util.CmsUriSplitter;
041
042import java.net.HttpURLConnection;
043import java.net.MalformedURLException;
044import java.net.URI;
045import java.net.URISyntaxException;
046import java.net.URL;
047import java.util.HashMap;
048import java.util.Iterator;
049import java.util.List;
050import java.util.Map;
051
052/**
053 * Class to validate pointer links.<p>
054 *
055 * @since 6.0.0
056 */
057public class CmsExternalLinksValidator implements I_CmsScheduledJob {
058
059    /** The report for the output. */
060    private I_CmsReport m_report;
061
062    /**
063     * Checks if the given url is valid.<p>
064     *
065     * @param check the url to check
066     * @param cms a OpenCms context object
067     *
068     * @return false if the url could not be accessed
069     */
070    public static boolean checkUrl(CmsObject cms, String check) {
071
072        // first, create an URI from the string representation
073        URI uri = null;
074        try {
075            uri = new CmsUriSplitter(check, true).toURI();
076        } catch (URISyntaxException exc) {
077            return false;
078        }
079        try {
080            if (!uri.isAbsolute()) {
081                return cms.existsResource(cms.getRequestContext().removeSiteRoot(uri.getPath()));
082            } else {
083                URL url = uri.toURL();
084                if ("http".equals(url.getProtocol())) {
085                    // ensure that file is encoded properly
086                    HttpURLConnection httpcon = (HttpURLConnection)url.openConnection();
087                    int responseCode = httpcon.getResponseCode();
088                    // accepting all status codes 2xx success and 3xx - redirect
089                    return ((responseCode >= 200) && (responseCode < 400));
090                } else {
091                    return true;
092                }
093            }
094        } catch (MalformedURLException mue) {
095            return false;
096        } catch (Exception ex) {
097            return false;
098        }
099    }
100
101    /**
102     * This method is called by the cron scheduler.<p>
103     *
104     * @param cms a OpenCms context object
105     * @param parameters link check parameters
106     * @return the String that is written to the OpenCms log
107     * @throws CmsException if something goes wrong
108     */
109    public String launch(CmsObject cms, Map<String, String> parameters) throws CmsException {
110
111        if (Boolean.valueOf(parameters.get("writeLog")).booleanValue()) {
112            m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class);
113        }
114        validateLinks(cms);
115        return "CmsExternLinkValidator.launch(): Links checked.";
116    }
117
118    /**
119     * Sets the report for the output.<p>
120     *
121     * @param report the report for the output
122     */
123    public void setReport(I_CmsReport report) {
124
125        m_report = report;
126    }
127
128    /**
129     * Validate all links.<p>
130     *
131     * @param cms a OpenCms context object
132     *
133     * @throws CmsException if something goes wrong
134     */
135    @SuppressWarnings("deprecation")
136    public void validateLinks(CmsObject cms) throws CmsException {
137
138        if (m_report == null) {
139            m_report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsExternalLinksValidator.class);
140        }
141
142        m_report.println(
143            Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_BEGIN_0),
144            I_CmsReport.FORMAT_HEADLINE);
145
146        // get all links
147
148        int pointerId = OpenCms.getResourceManager().getResourceType(
149            CmsResourceTypePointer.getStaticTypeName()).getTypeId();
150        CmsResourceFilter filter = CmsResourceFilter.ONLY_VISIBLE_NO_DELETED.addRequireType(pointerId);
151        List<CmsResource> links = cms.readResources("/", filter);
152        Iterator<CmsResource> iterator = links.iterator();
153        Map<String, String> brokenLinks = new HashMap<String, String>();
154
155        for (int i = 1; iterator.hasNext(); i++) {
156            CmsFile link = cms.readFile(cms.getSitePath(iterator.next()), filter);
157            String linkUrl = new String(link.getContents());
158
159            // print to the report
160            m_report.print(
161                org.opencms.report.Messages.get().container(
162                    org.opencms.report.Messages.RPT_SUCCESSION_1,
163                    new Integer(i),
164                    new Integer(links.size())),
165                I_CmsReport.FORMAT_NOTE);
166            m_report.print(Messages.get().container(Messages.RPT_VALIDATE_LINK_0), I_CmsReport.FORMAT_NOTE);
167            m_report.print(
168                org.opencms.report.Messages.get().container(
169                    org.opencms.report.Messages.RPT_ARGUMENT_1,
170                    link.getRootPath()));
171            m_report.print(Messages.get().container(Messages.GUI_LINK_POINTING_TO_0), I_CmsReport.FORMAT_NOTE);
172            m_report.print(
173                org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_ARGUMENT_1, linkUrl));
174            m_report.print(org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0));
175
176            // check link and append it to the list of broken links, if test fails
177            if (!checkUrl(cms, linkUrl)) {
178                brokenLinks.put(link.getRootPath(), linkUrl);
179                m_report.println(Messages.get().container(Messages.RPT_BROKEN_0), I_CmsReport.FORMAT_ERROR);
180            } else {
181                m_report.println(
182                    org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
183                    I_CmsReport.FORMAT_OK);
184            }
185        }
186
187        m_report.println(
188            Messages.get().container(
189                Messages.RPT_LINK_VALIDATION_STAT_2,
190                new Integer(links.size()),
191                new Integer(brokenLinks.size())),
192            I_CmsReport.FORMAT_HEADLINE);
193        m_report.println(
194            Messages.get().container(Messages.RPT_VALIDATE_EXTERNAL_LINKS_END_0),
195            I_CmsReport.FORMAT_HEADLINE);
196
197        OpenCms.getLinkManager().setPointerLinkValidationResult(new CmsExternalLinksValidationResult(brokenLinks));
198    }
199}