001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.site.xmlsitemap;
029
030import org.opencms.ade.configuration.CmsADEConfigData.DetailInfo;
031import org.opencms.file.CmsProperty;
032import org.opencms.file.CmsResource;
033import org.opencms.file.CmsResourceFilter;
034import org.opencms.file.types.I_CmsResourceType;
035import org.opencms.main.CmsException;
036import org.opencms.main.CmsLog;
037import org.opencms.main.OpenCms;
038import org.opencms.util.CmsFileUtil;
039import org.opencms.util.CmsPathMap;
040import org.opencms.util.CmsStringUtil;
041
042import java.util.ArrayList;
043import java.util.Collection;
044import java.util.Collections;
045import java.util.Comparator;
046import java.util.HashMap;
047import java.util.List;
048import java.util.Locale;
049
050import org.apache.commons.logging.Log;
051
052import com.google.common.collect.ArrayListMultimap;
053import com.google.common.collect.Lists;
054import com.google.common.collect.Maps;
055import com.google.common.collect.Multimap;
056
057/**
058 * Sitemap generator class which tries to eliminate duplicate detail pages for the same content and locale.<p>
059 *
060 * In principle, any detail page for a type somewhere in the system could be used to display contents anywhere
061 * else in the system. This sitemap generator, instead of generating all detail page URLs that could possibly be generated,
062 * instead tries to find only the best candidate URL for each content / locale combination.
063 */
064public class CmsDetailPageDuplicateEliminatingSitemapGenerator extends CmsXmlSitemapGenerator {
065
066    /** The logger instance for this class. */
067    private static final Log LOG = CmsLog.getLog(CmsDetailPageDuplicateEliminatingSitemapGenerator.class);
068
069    /** The detail page information. */
070    protected List<DetailInfo> m_detailInfos = new ArrayList<DetailInfo>();
071
072    /** Multimap of detail infos with the detail page as key. */
073    private Multimap<String, DetailInfo> m_detailInfosByPage;
074
075    /** Cache for path maps containing the content resources. */
076    private HashMap<String, CmsPathMap<CmsResource>> m_pathMapsByType = Maps.newHashMap();
077
078    /**
079     * Constructor.<p>
080     *
081     * @param sitemapPath the sitemap path
082     * @throws CmsException if something goes wrong
083     */
084    public CmsDetailPageDuplicateEliminatingSitemapGenerator(String sitemapPath)
085    throws CmsException {
086        super(sitemapPath);
087        List<DetailInfo> rawDetailInfo = OpenCms.getADEManager().getDetailInfo(m_guestCms);
088        List<DetailInfo> filteredDetailInfo = Lists.newArrayList();
089        for (DetailInfo item : rawDetailInfo) {
090            String path = item.getFolderPath();
091            if (OpenCms.getSiteManager().startsWithShared(path) || CmsStringUtil.isPrefixPath(m_siteRoot, path)) {
092                filteredDetailInfo.add(item);
093            } else {
094                if (LOG.isDebugEnabled()) {
095                    LOG.debug("Filtered detail info: " + item);
096                }
097            }
098        }
099        m_detailInfos = filteredDetailInfo;
100
101    }
102
103    /**
104     * @see org.opencms.site.xmlsitemap.CmsXmlSitemapGenerator#generateSitemapBeans()
105     */
106    @Override
107    public List<CmsXmlSitemapUrlBean> generateSitemapBeans() throws CmsException {
108
109        List<CmsXmlSitemapUrlBean> parentResult = super.generateSitemapBeans();
110        List<CmsXmlSitemapUrlBean> result = Lists.newArrayList();
111        Multimap<String, CmsXmlSitemapUrlBean> detailPageBeans = ArrayListMultimap.create();
112
113        // We want to eliminate duplicate detail pages for the same detail content and locale,
114        // so first we group the XML sitemap beans belonging to detail pages by their locale/content combination,
115        // and then we sort each group by the sitemap configuration where the detail page is coming from,
116        // and then only take the last element in each group.
117
118        for (CmsXmlSitemapUrlBean urlBean : parentResult) {
119            if (urlBean.getDetailPageResource() == null) {
120                result.add(urlBean);
121            } else {
122                String localeKey = urlBean.getOriginalResource().getStructureId() + "_" + urlBean.getLocale();
123                detailPageBeans.put(localeKey, urlBean);
124            }
125        }
126        Comparator<CmsXmlSitemapUrlBean> pathComparator = new Comparator<CmsXmlSitemapUrlBean>() {
127
128            public int compare(CmsXmlSitemapUrlBean urlbean1, CmsXmlSitemapUrlBean urlbean2) {
129
130                String subsite1 = urlbean1.getSubsite();
131                if (subsite1 == null) {
132                    subsite1 = "";
133                }
134                String subsite2 = urlbean2.getSubsite();
135                if (subsite2 == null) {
136                    subsite2 = "";
137                }
138                return subsite1.compareTo(subsite2);
139            }
140        };
141        for (String key : detailPageBeans.keySet()) {
142            result.add(Collections.max(detailPageBeans.get(key), pathComparator));
143        }
144        return result;
145    }
146
147    /**
148     * @see org.opencms.site.xmlsitemap.CmsXmlSitemapGenerator#addDetailLinks(org.opencms.file.CmsResource, java.util.Locale)
149     */
150    @Override
151    protected void addDetailLinks(CmsResource containerPage, Locale locale) throws CmsException {
152
153        Collection<DetailInfo> detailInfos = getDetailInfosForPage(containerPage);
154        for (DetailInfo info : detailInfos) {
155            List<CmsResource> contents = getContents(info.getFolderPath(), info.getType());
156            for (CmsResource detailRes : contents) {
157                List<CmsProperty> detailProps = m_guestCms.readPropertyObjects(detailRes, true);
158                String detailLink = getDetailLink(containerPage, detailRes, locale);
159                detailLink = CmsFileUtil.removeTrailingSeparator(detailLink);
160                CmsXmlSitemapUrlBean detailUrlBean = new CmsXmlSitemapUrlBean(
161                    replaceServerUri(detailLink),
162                    detailRes.getDateLastModified(),
163                    getChangeFrequency(detailProps),
164                    getPriority(detailProps));
165                detailUrlBean.setLocale(locale);
166                detailUrlBean.setOriginalResource(detailRes);
167                detailUrlBean.setDetailPageResource(containerPage);
168                detailUrlBean.setSubsite(info.getBasePath());
169                addResult(detailUrlBean, 2);
170            }
171        }
172    }
173
174    /**
175     * Gets the contents for the given folder path and type name.<p>
176     *
177     * @param folderPath the content folder path
178     * @param type the type name
179     * @return the list of contents
180     *
181     * @throws CmsException if something goes wrong
182     */
183    private List<CmsResource> getContents(String folderPath, String type) throws CmsException {
184
185        CmsPathMap<CmsResource> pathMap = getPathMapForType(type);
186        return pathMap.getChildValues(folderPath);
187    }
188
189    /**
190     * Gets the detail information for the given container page.<p>
191     *
192     * @param containerPage the container page
193     * @return the detail information
194     */
195    private Collection<DetailInfo> getDetailInfosForPage(CmsResource containerPage) {
196
197        if (m_detailInfosByPage == null) {
198            m_detailInfosByPage = ArrayListMultimap.create();
199            for (DetailInfo detailInfo : m_detailInfos) {
200                m_detailInfosByPage.put(detailInfo.getDetailPageInfo().getUri(), detailInfo);
201            }
202        }
203        String folderPath = CmsResource.getParentFolder(containerPage.getRootPath());
204        Collection<DetailInfo> result = m_detailInfosByPage.get(containerPage.getRootPath());
205        if (result.isEmpty()) {
206            result = m_detailInfosByPage.get(folderPath);
207        }
208        return result;
209    }
210
211    /**
212     * Gets the path map containing the contents for the given type.<p>
213     *
214     * @param typeName the type name
215     * @return the path map with the content resources
216     *
217     * @throws CmsException if something goes wrong
218     */
219    private CmsPathMap<CmsResource> getPathMapForType(String typeName) throws CmsException {
220
221        if (!m_pathMapsByType.containsKey(typeName)) {
222            CmsPathMap<CmsResource> pathMap = readPathMapForType(
223                OpenCms.getResourceManager().getResourceType(typeName));
224            m_pathMapsByType.put(typeName, pathMap);
225        }
226        return m_pathMapsByType.get(typeName);
227    }
228
229    /**
230     * Reads the contents of a given type and stores them in a path map.<p>
231     *
232     * @param type the type for which to read the contents
233     * @return the path map containing the contents
234     */
235    private CmsPathMap<CmsResource> readPathMapForType(I_CmsResourceType type) {
236
237        List<CmsResource> result = new ArrayList<CmsResource>();
238        CmsResourceFilter filter = CmsResourceFilter.DEFAULT_FILES.addRequireType(type);
239        try {
240            List<CmsResource> siteFiles = m_guestCms.readResources(m_siteRoot, filter, true);
241            result.addAll(siteFiles);
242        } catch (CmsException e) {
243            LOG.error("XML sitemap generator error: " + e.getLocalizedMessage(), e);
244        }
245        String shared = CmsFileUtil.removeTrailingSeparator(OpenCms.getSiteManager().getSharedFolder());
246        if (shared != null) {
247            try {
248                List<CmsResource> sharedFiles = m_guestCms.readResources(shared, filter, true);
249                result.addAll(sharedFiles);
250            } catch (CmsException e) {
251                LOG.error("XML sitemap generator error: " + e.getLocalizedMessage(), e);
252            }
253        }
254        CmsPathMap<CmsResource> resultMap = new CmsPathMap<CmsResource>();
255        for (CmsResource resource : result) {
256            resultMap.add(resource.getRootPath(), resource);
257        }
258        return resultMap;
259    }
260
261}