001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (C) Alkacon Software (https://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: https://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: https://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.site.xmlsitemap;
029
030import org.opencms.ade.configuration.CmsADEConfigData;
031import org.opencms.ade.configuration.CmsADEManager;
032import org.opencms.ade.detailpage.CmsDetailPageInfo;
033import org.opencms.db.CmsAlias;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsProperty;
036import org.opencms.file.CmsPropertyDefinition;
037import org.opencms.file.CmsRequestContext;
038import org.opencms.file.CmsResource;
039import org.opencms.file.CmsResourceFilter;
040import org.opencms.file.CmsVfsResourceNotFoundException;
041import org.opencms.file.types.CmsResourceTypeHtmlRedirect;
042import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
043import org.opencms.file.types.I_CmsResourceType;
044import org.opencms.gwt.shared.alias.CmsAliasMode;
045import org.opencms.jsp.CmsJspNavBuilder;
046import org.opencms.jsp.CmsJspNavElement;
047import org.opencms.loader.CmsLoaderException;
048import org.opencms.loader.CmsResourceManager;
049import org.opencms.main.CmsException;
050import org.opencms.main.CmsLog;
051import org.opencms.main.OpenCms;
052import org.opencms.relations.CmsRelation;
053import org.opencms.relations.CmsRelationFilter;
054import org.opencms.relations.CmsRelationType;
055import org.opencms.site.CmsSite;
056import org.opencms.staticexport.CmsLinkManager;
057import org.opencms.util.CmsFileUtil;
058import org.opencms.util.CmsStringUtil;
059import org.opencms.util.CmsUUID;
060
061import java.net.URI;
062import java.net.URISyntaxException;
063import java.util.ArrayList;
064import java.util.Collection;
065import java.util.HashMap;
066import java.util.HashSet;
067import java.util.Iterator;
068import java.util.LinkedHashMap;
069import java.util.List;
070import java.util.Locale;
071import java.util.Map;
072import java.util.Set;
073
074import org.apache.commons.logging.Log;
075
076import com.google.common.collect.ArrayListMultimap;
077import com.google.common.collect.Multimap;
078
079/**
080 * Class for generating XML sitemaps for SEO purposes, as described in
081 * <a href="http://www.sitemaps.org/protocol.html">http://www.sitemaps.org/protocol.html</a>.<p>
082 */
083public class CmsXmlSitemapGenerator {
084
085    /**
086     * A bean that consists of a sitemap URL bean and a priority score, to determine which of multiple entries with the same
087     * URL are to be preferred.<p>
088     */
089    protected class ResultEntry {
090
091        /** Internal priority to determine which of multiple entries with the same URL is used.
092         * Note that this has nothing to do with the priority in the URL bean itself!
093         */
094        private int m_priority;
095
096        /** The URL bean. */
097        private CmsXmlSitemapUrlBean m_urlBean;
098
099        /**
100         * Creates a new result entry.<p>
101         *
102         * @param urlBean the url bean
103         *
104         * @param priority the internal priority
105         */
106        public ResultEntry(CmsXmlSitemapUrlBean urlBean, int priority) {
107
108            m_priority = priority;
109            m_urlBean = urlBean;
110        }
111
112        /**
113         * Gets the internal priority used to determine which of multiple entries with the same URL to use.<p>
114         * This has nothing to do with the priority defined in the URL beans themselves!
115         *
116         * @return the internal priority
117         */
118        public int getPriority() {
119
120            return m_priority;
121        }
122
123        /**
124         * Gets the URL bean.<p>
125         *
126         * @return the URL bean
127         */
128        public CmsXmlSitemapUrlBean getUrlBean() {
129
130            return m_urlBean;
131        }
132    }
133
134    /** The default change frequency. */
135    public static final String DEFAULT_CHANGE_FREQUENCY = "daily";
136
137    /** The default priority. */
138    public static final double DEFAULT_PRIORITY = 0.5;
139
140    /** Sitemap attribute to exclude empty detail pages relevant for settings only. */
141    public static final String ATTR_DETAIL_SETTINGS_PAGE_EXCLUDE = "template.detailsettingspage.exclude";
142
143    /** The logger instance for this class. */
144    private static final Log LOG = CmsLog.getLog(CmsXmlSitemapGenerator.class);
145
146    /** The root path for the sitemap root folder. */
147    protected String m_baseFolderRootPath;
148
149    /** The site path of the base folder. */
150    protected String m_baseFolderSitePath;
151
152    /** Flag to control whether container page dates should be computed. */
153    protected boolean m_computeContainerPageDates;
154
155    /** The list of detail page info beans. */
156    protected List<CmsDetailPageInfo> m_detailPageInfos = new ArrayList<CmsDetailPageInfo>();
157
158    /** A map from type names to lists of potential detail resources of that type. */
159    protected Map<String, List<CmsResource>> m_detailResources = new HashMap<String, List<CmsResource>>();
160
161    /** A multimap from detail page root paths to corresponding types. */
162    protected Multimap<String, String> m_detailTypesByPage = ArrayListMultimap.create();
163
164    /** A CMS context with guest privileges. */
165    protected CmsObject m_guestCms;
166
167    /** The include/exclude configuration used for choosing pages for the XML sitemap. */
168    protected CmsPathIncludeExcludeSet m_includeExcludeSet = new CmsPathIncludeExcludeSet();
169
170    /** A map from structure ids to page aliases below the base folder which point to the given structure id. */
171    protected Multimap<CmsUUID, CmsAlias> m_pageAliasesBelowBaseFolderByStructureId = ArrayListMultimap.create();
172
173    /** The map used for storing the results, with URLs as keys. */
174    protected Map<String, ResultEntry> m_resultMap = new LinkedHashMap<String, ResultEntry>();
175
176    /** A guest user CMS object with the site root of the base folder. */
177    protected CmsObject m_siteGuestCms;
178
179    /** The site root of the base folder. */
180    protected String m_siteRoot;
181
182    /** A link to the site root. */
183    protected String m_siteRootLink;
184
185    /** Configured replacement server URL. */
186    private String m_serverUrl;
187
188    /**
189     * Creates a new sitemap generator instance.<p>
190     *
191     * @param folderRootPath the root folder for the XML sitemap to generate
192     *
193     * @throws CmsException if something goes wrong
194     */
195    public CmsXmlSitemapGenerator(String folderRootPath)
196    throws CmsException {
197
198        m_baseFolderRootPath = CmsFileUtil.removeTrailingSeparator(folderRootPath);
199        m_guestCms = OpenCms.initCmsObject(OpenCms.getDefaultUsers().getUserGuest());
200        m_siteGuestCms = OpenCms.initCmsObject(m_guestCms);
201        CmsSite site = OpenCms.getSiteManager().getSiteForRootPath(CmsStringUtil.joinPaths(folderRootPath, "/"));
202        m_siteRoot = site.getSiteRoot();
203
204        m_siteGuestCms.getRequestContext().setSiteRoot(m_siteRoot);
205        m_baseFolderSitePath = CmsStringUtil.joinPaths(
206            "/",
207            m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath));
208    }
209
210    /**
211     * Replaces the protocol/host/port of a link with the ones from the given server URI, if it's not empty.<p>
212     *
213     * @param link the link to change
214     * @param server the server URI string
215    
216     * @return the changed link
217     */
218    public static String replaceServerUri(String link, String server) {
219
220        String serverUriStr = server;
221
222        if (CmsStringUtil.isEmptyOrWhitespaceOnly(serverUriStr)) {
223            return link;
224        }
225        try {
226            URI serverUri = new URI(serverUriStr);
227            URI linkUri = new URI(link);
228            URI result = new URI(
229                serverUri.getScheme(),
230                serverUri.getAuthority(),
231                linkUri.getPath(),
232                linkUri.getQuery(),
233                linkUri.getFragment());
234            return result.toString();
235        } catch (URISyntaxException e) {
236            LOG.error(e.getLocalizedMessage(), e);
237            return link;
238        }
239
240    }
241
242    /**
243     * Gets the change frequency for a sitemap entry from a list of properties.<p>
244     *
245     * If the change frequency is not defined in the properties, this method will return null.<p>
246     *
247     * @param properties the properties from which the change frequency should be obtained
248     *
249     * @return the change frequency string
250     */
251    protected static String getChangeFrequency(List<CmsProperty> properties) {
252
253        CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_CHANGEFREQ, properties);
254        if (prop.isNullProperty()) {
255            return null;
256        }
257        String result = prop.getValue().trim();
258        return result;
259    }
260
261    /**
262     * Gets the page priority from a list of properties.<p>
263     *
264     * If the page priority can't be found among the properties, -1 will be returned.<p>
265     *
266     * @param properties the properties of a resource
267     *
268     * @return the page priority read from the properties, or -1
269     */
270    protected static double getPriority(List<CmsProperty> properties) {
271
272        CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_PRIORITY, properties);
273        if (prop.isNullProperty()) {
274            return -1.0;
275        }
276        try {
277            double result = Double.parseDouble(prop.getValue().trim());
278            return result;
279        } catch (NumberFormatException e) {
280            return -1.0;
281        }
282    }
283
284    /**
285     * Removes files marked as internal from a resource list.<p>
286     *
287     * @param resources the list which should be replaced
288     */
289    protected static void removeInternalFiles(List<CmsResource> resources) {
290
291        Iterator<CmsResource> iter = resources.iterator();
292        while (iter.hasNext()) {
293            CmsResource resource = iter.next();
294            if (resource.isInternal()) {
295                iter.remove();
296            }
297        }
298    }
299
300    /**
301     * Generates a list of XML sitemap entry beans for the root folder which has been set in the constructor.<p>
302     *
303     * @return the list of XML sitemap entries
304     *
305     * @throws CmsException if something goes wrong
306     */
307    public List<CmsXmlSitemapUrlBean> generateSitemapBeans() throws CmsException {
308
309        String baseSitePath = m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath);
310        initializeFileData(baseSitePath);
311        for (CmsResource resource : getDirectPages()) {
312            if (CmsResourceTypeHtmlRedirect.isRedirect(resource)) {
313                continue;
314            }
315            String sitePath = m_siteGuestCms.getSitePath(resource);
316            List<CmsProperty> propertyList = m_siteGuestCms.readPropertyObjects(resource, true);
317            String onlineLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, sitePath);
318            boolean isContainerPage = CmsResourceTypeXmlContainerPage.isContainerPage(resource);
319            long dateModified = resource.getDateLastModified();
320            if (isContainerPage) {
321                if (m_computeContainerPageDates) {
322                    dateModified = computeContainerPageModificationDate(resource);
323                } else {
324                    dateModified = -1;
325                }
326            }
327            CmsXmlSitemapUrlBean urlBean = new CmsXmlSitemapUrlBean(
328                replaceServerUri(onlineLink),
329                dateModified,
330                getChangeFrequency(propertyList),
331                getPriority(propertyList));
332            urlBean.setOriginalResource(resource);
333            boolean isDefaultDetailPage = isDefaultDetailPage(resource);
334            List<I_CmsResourceType> types = getDetailTypesForPage(resource);
335            if (isDefaultDetailPage) { // default detail page
336                if (!excludeDetailPage(resource)) {
337                    addResult(urlBean, 3);
338                }
339            } else if (types.isEmpty()) { // not a detail page
340                addResult(urlBean, 3);
341            } else { // typed detail page
342                if (!excludeDetailPage(resource)) {
343                    addResult(urlBean, 3);
344                }
345                Locale locale = getLocale(resource, propertyList);
346                addDetailLinks(resource, locale, types);
347            }
348        }
349
350        for (CmsUUID aliasStructureId : m_pageAliasesBelowBaseFolderByStructureId.keySet()) {
351            addAliasLinks(aliasStructureId);
352        }
353
354        List<CmsXmlSitemapUrlBean> result = new ArrayList<CmsXmlSitemapUrlBean>();
355        for (ResultEntry resultEntry : m_resultMap.values()) {
356            result.add(resultEntry.getUrlBean());
357        }
358        return result;
359    }
360
361    /**
362     * Gets the include/exclude configuration of this XML sitemap generator.<p>
363     *
364     * @return the include/exclude configuration
365     */
366    public CmsPathIncludeExcludeSet getIncludeExcludeSet() {
367
368        return m_includeExcludeSet;
369    }
370
371    /**
372     * Generates a sitemap and formats it as a string.<p>
373     *
374     * @return the sitemap XML data
375     *
376     * @throws CmsException if something goes wrong
377     */
378    public String renderSitemap() throws CmsException {
379
380        StringBuffer buffer = new StringBuffer();
381        List<CmsXmlSitemapUrlBean> urlBeans = generateSitemapBeans();
382        buffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
383        buffer.append(getUrlSetOpenTag() + "\n");
384        for (CmsXmlSitemapUrlBean bean : urlBeans) {
385            buffer.append(getXmlForEntry(bean));
386            buffer.append("\n");
387        }
388        buffer.append("</urlset>");
389        return buffer.toString();
390    }
391
392    /**
393     * Enables or disables computation of container page dates.<p>
394     *
395     * @param computeContainerPageDates the new value
396     */
397    public void setComputeContainerPageDates(boolean computeContainerPageDates) {
398
399        m_computeContainerPageDates = computeContainerPageDates;
400    }
401
402    /**
403     * Sets the replacement server URL.<p>
404     *
405     * The replacement server URL will replace the scheme/host/port from the URLs returned by getOnlineLink.
406     *
407     * @param serverUrl the server URL
408     */
409    public void setServerUrl(String serverUrl) {
410
411        m_serverUrl = serverUrl;
412    }
413
414    /**
415     * Adds the detail page links for a given page to the results.<p>
416     *
417     * @param containerPage the container page resource
418     * @param locale the locale of the container page
419     *
420     * @throws CmsException if something goes wrong
421     */
422    protected void addDetailLinks(CmsResource containerPage, Locale locale) throws CmsException {
423
424        List<I_CmsResourceType> types = getDetailTypesForPage(containerPage);
425        addDetailLinks(containerPage, locale, types);
426    }
427
428    /**
429     * Adds an URL bean to the internal map of results, but only if there is no existing entry with higher internal priority
430     * than the priority given as an argument.<p>
431     *
432     * @param result the result URL bean to add
433     *
434     * @param resultPriority the internal priority to use for updating the map of results
435     */
436    protected void addResult(CmsXmlSitemapUrlBean result, int resultPriority) {
437
438        String url = CmsFileUtil.removeTrailingSeparator(result.getUrl());
439        boolean writeEntry = true;
440        if (m_resultMap.containsKey(url)) {
441            LOG.warn("Encountered duplicate URL with while generating sitemap: " + result.getUrl());
442            ResultEntry entry = m_resultMap.get(url);
443            writeEntry = entry.getPriority() <= resultPriority;
444        }
445        if (writeEntry) {
446            m_resultMap.put(url, new ResultEntry(result, resultPriority));
447        }
448    }
449
450    /**
451     * Computes the container the container page modification date from its referenced contents.<p>
452     *
453     * @param containerPage the container page
454     *
455     * @return the computed modification date
456     *
457     * @throws CmsException if something goes wrong
458     */
459    protected long computeContainerPageModificationDate(CmsResource containerPage) throws CmsException {
460
461        CmsRelationFilter filter = CmsRelationFilter.relationsFromStructureId(
462            containerPage.getStructureId()).filterType(CmsRelationType.XML_STRONG);
463        List<CmsRelation> relations = m_guestCms.readRelations(filter);
464        long result = containerPage.getDateLastModified();
465        for (CmsRelation relation : relations) {
466            try {
467                CmsResource target = relation.getTarget(
468                    m_guestCms,
469                    CmsResourceFilter.DEFAULT_FILES.addRequireVisible());
470                long targetDate = target.getDateLastModified();
471                if (targetDate > result) {
472                    result = targetDate;
473                }
474            } catch (CmsException e) {
475                LOG.warn(
476                    "Could not get relation target for relation "
477                        + relation.toString()
478                        + " | "
479                        + e.getLocalizedMessage(),
480                    e);
481            }
482        }
483
484        return result;
485    }
486
487    /**
488     * Returns whether to exclude the given detail page.
489     * @param detailPage the detail page
490     * @return whether to exclude the given detail page
491     */
492    protected boolean excludeDetailPage(CmsResource detailPage) {
493
494        CmsADEConfigData adeConfigData = OpenCms.getADEManager().lookupConfigurationWithCache(
495            m_guestCms,
496            detailPage.getRootPath());
497        String exclude = adeConfigData.getAttribute(ATTR_DETAIL_SETTINGS_PAGE_EXCLUDE, null);
498        return Boolean.valueOf(exclude);
499    }
500
501    /**
502     * Gets the detail link for a given container page and detail content.<p>
503     *
504     * Note: The actual container page used for the result link is not necessarily the container page passed
505     * in as parameter - the default detail page in the sitemap containing the page is used.
506     *
507     * @param pageRes the container page
508     * @param detailRes the detail content
509     * @param locale the locale for which we want the link
510     *
511     * @return the detail page link
512     */
513    protected String getDetailLink(CmsResource pageRes, CmsResource detailRes, Locale locale) {
514
515        String pageSitePath = m_siteGuestCms.getSitePath(pageRes);
516        String detailSitePath = m_siteGuestCms.getSitePath(detailRes);
517        CmsRequestContext requestContext = m_siteGuestCms.getRequestContext();
518        String originalUri = requestContext.getUri();
519        Locale originalLocale = requestContext.getLocale();
520        try {
521            requestContext.setUri(pageSitePath);
522            requestContext.setLocale(locale);
523            return OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, detailSitePath, true);
524        } finally {
525            requestContext.setUri(originalUri);
526            requestContext.setLocale(originalLocale);
527        }
528    }
529
530    /**
531     * Gets the types for which a given resource is configured as a detail page.<p>
532     *
533     * @param resource a resource for which we want to find the detail page types
534     *
535     * @return the list of resource types for which the given page is configured as a detail page
536     */
537    protected List<I_CmsResourceType> getDetailTypesForPage(CmsResource resource) {
538
539        Collection<String> typesForPage = m_detailTypesByPage.get(resource.getRootPath());
540        String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath()));
541        Collection<String> typesForFolder = m_detailTypesByPage.get(parentPath);
542        Set<String> allTypes = new HashSet<String>();
543        allTypes.addAll(typesForPage);
544        allTypes.addAll(typesForFolder);
545        List<I_CmsResourceType> resTypes = new ArrayList<I_CmsResourceType>();
546        CmsResourceManager resMan = OpenCms.getResourceManager();
547        for (String typeName : allTypes) {
548            if (typeName.startsWith(CmsDetailPageInfo.FUNCTION_PREFIX)) {
549                continue;
550            }
551            try {
552                I_CmsResourceType resType = resMan.getResourceType(typeName);
553                resTypes.add(resType);
554            } catch (CmsLoaderException e) {
555                LOG.warn("Invalid resource type name" + typeName + "! " + e.getLocalizedMessage(), e);
556            }
557        }
558        return resTypes;
559    }
560
561    /**
562     * Gets the list of pages which should be directly added to the XML sitemap.<p>
563     *
564     * @return the list of resources which should be directly added to the XML sitemap
565     *
566     * @throws CmsException if something goes wrong
567     */
568    protected List<CmsResource> getDirectPages() throws CmsException {
569
570        List<CmsResource> result = new ArrayList<CmsResource>();
571        result.addAll(getNavigationPages());
572        Set<String> includeRoots = m_includeExcludeSet.getIncludeRoots();
573        for (String includeRoot : includeRoots) {
574            try {
575                CmsResource resource = m_guestCms.readResource(includeRoot);
576                if (resource.isFile()) {
577                    result.add(resource);
578                } else {
579                    List<CmsResource> subtreeFiles = m_guestCms.readResources(
580                        includeRoot,
581                        CmsResourceFilter.DEFAULT_FILES,
582                        true);
583                    result.addAll(subtreeFiles);
584                }
585            } catch (CmsVfsResourceNotFoundException e) {
586                LOG.warn("Could not read include resource: " + includeRoot);
587            }
588        }
589        Iterator<CmsResource> filterIter = result.iterator();
590        while (filterIter.hasNext()) {
591            CmsResource currentResource = filterIter.next();
592            if (currentResource.isInternal() || m_includeExcludeSet.isExcluded(currentResource.getRootPath())) {
593                filterIter.remove();
594            }
595        }
596        return result;
597    }
598
599    /**
600     * Writes the inner node content for an url element to a buffer.<p>
601     *
602     * @param entry the entry for which the content should be written
603     * @return the inner XML
604     */
605    protected String getInnerXmlForEntry(CmsXmlSitemapUrlBean entry) {
606
607        StringBuffer buffer = new StringBuffer();
608        entry.writeElement(buffer, "loc", entry.getUrl());
609        entry.writeLastmod(buffer);
610        entry.writeChangefreq(buffer);
611        entry.writePriority(buffer);
612        return buffer.toString();
613    }
614
615    /**
616     * Gets the list of pages from the navigation which should be directly added to the XML sitemap.<p>
617     *
618     * @return the list of pages to add to the XML sitemap
619     */
620    protected List<CmsResource> getNavigationPages() {
621
622        List<CmsResource> result = new ArrayList<CmsResource>();
623        CmsJspNavBuilder navBuilder = new CmsJspNavBuilder(m_siteGuestCms);
624        try {
625            CmsResource rootDefaultFile = m_siteGuestCms.readDefaultFile(
626                m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath),
627                CmsResourceFilter.DEFAULT);
628            if (rootDefaultFile != null) {
629                result.add(rootDefaultFile);
630            }
631        } catch (Exception e) {
632            LOG.info(e.getLocalizedMessage(), e);
633        }
634        List<CmsJspNavElement> navElements = navBuilder.getSiteNavigation(
635            m_baseFolderSitePath,
636            CmsJspNavBuilder.Visibility.includeHidden,
637            -1);
638        for (CmsJspNavElement navElement : navElements) {
639            CmsResource navResource = navElement.getResource();
640            if (navResource.isFolder()) {
641                try {
642                    CmsResource defaultFile = m_guestCms.readDefaultFile(navResource, CmsResourceFilter.DEFAULT_FILES);
643                    if (defaultFile != null) {
644                        result.add(defaultFile);
645                    } else {
646                        LOG.warn("Could not get default file for " + navResource.getRootPath());
647                    }
648                } catch (CmsException e) {
649                    LOG.warn("Could not get default file for " + navResource.getRootPath());
650                }
651            } else {
652                result.add(navResource);
653            }
654        }
655        return result;
656    }
657
658    /**
659     * Gets the opening tag for the urlset element (can be overridden to add e.g. more namespaces.<p>
660     *
661     * @return the opening tag
662     */
663    protected String getUrlSetOpenTag() {
664
665        return "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">";
666    }
667
668    /**
669     * Writes the XML for an URL entry to a buffer.<p>
670     *
671     * @param entry the XML sitemap entry bean
672     *
673     * @return an XML representation of this bean
674     */
675    protected String getXmlForEntry(CmsXmlSitemapUrlBean entry) {
676
677        StringBuffer buffer = new StringBuffer();
678        buffer.append("<url>");
679        buffer.append(getInnerXmlForEntry(entry));
680        buffer.append("</url>");
681        return buffer.toString();
682    }
683
684    /**
685     * Checks whether the given alias is below the base folder.<p>
686     *
687     * @param alias the alias to check
688     *
689     * @return true if the alias is below the base folder
690     */
691    protected boolean isAliasBelowBaseFolder(CmsAlias alias) {
692
693        boolean isBelowBaseFolder = CmsStringUtil.isPrefixPath(m_baseFolderSitePath, alias.getAliasPath());
694        return isBelowBaseFolder;
695    }
696
697    /**
698     * Returns whether the given page is a default detail page.
699     * @param resource the page resource
700     * @return whether the given page is a default detail page
701     */
702    protected boolean isDefaultDetailPage(CmsResource resource) {
703
704        Collection<String> typesForPage = m_detailTypesByPage.get(resource.getRootPath());
705        String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath()));
706        Collection<String> typesForFolder = m_detailTypesByPage.get(parentPath);
707        Set<String> allTypes = new HashSet<String>();
708        allTypes.addAll(typesForPage);
709        allTypes.addAll(typesForFolder);
710        for (String typeName : allTypes) {
711            if (typeName.equals(CmsADEManager.DEFAULT_DETAILPAGE_TYPE)) {
712                return true;
713            }
714        }
715        return false;
716    }
717
718    /**
719     * Checks whether the page/detail content combination is a valid detail page.<p>
720     *
721     * @param page the container page
722     * @param locale the locale
723     * @param detailRes the detail content resource
724     *
725     * @return true if this is a valid detail page combination
726     */
727    protected boolean isValidDetailPageCombination(CmsResource page, Locale locale, CmsResource detailRes) {
728
729        return OpenCms.getADEManager().getDetailPageHandler().isValidDetailPage(m_guestCms, page, detailRes);
730    }
731
732    /**
733     * Replaces the protocol/host/port of a link with the ones from the configured server URI, if it's not empty.<p>
734     *
735     * @param link the link to change
736     *
737     * @return the changed link
738     */
739    protected String replaceServerUri(String link) {
740
741        return replaceServerUri(link, m_serverUrl);
742    }
743
744    /**
745     * Adds the alias links for a given structure id to the results.<p>
746     *
747     * @param aliasStructureId the alias target structure id
748     */
749    private void addAliasLinks(CmsUUID aliasStructureId) {
750
751        try {
752            CmsResource aliasTarget = m_guestCms.readResource(aliasStructureId);
753            List<CmsProperty> properties = m_guestCms.readPropertyObjects(aliasTarget, true);
754            double priority = getPriority(properties);
755            String changeFrequency = getChangeFrequency(properties);
756            Collection<CmsAlias> aliases = m_pageAliasesBelowBaseFolderByStructureId.get(aliasStructureId);
757            for (CmsAlias alias : aliases) {
758                String aliasLink = (m_siteRootLink + "/" + alias.getAliasPath()).replaceAll("(?<!:)//+", "/");
759                CmsXmlSitemapUrlBean aliasUrlBean = new CmsXmlSitemapUrlBean(
760                    replaceServerUri(aliasLink),
761                    -1,
762                    changeFrequency,
763                    priority);
764                aliasUrlBean.setOriginalResource(aliasTarget);
765                addResult(aliasUrlBean, 1);
766            }
767        } catch (CmsException e) {
768            LOG.error(e.getLocalizedMessage(), e);
769        }
770    }
771
772    /**
773     * Adds the detail page links for a given page to the results.<p>
774     *
775     * @param containerPage the container page resource
776     * @param locale the locale of the container page
777     * @param types the detail types
778     *
779     * @throws CmsException if something goes wrong
780     */
781    private void addDetailLinks(CmsResource containerPage, Locale locale, List<I_CmsResourceType> types)
782    throws CmsException {
783
784        for (I_CmsResourceType type : types) {
785            List<CmsResource> resourcesForType = getDetailResources(type);
786            for (CmsResource detailRes : resourcesForType) {
787                if (!isValidDetailPageCombination(containerPage, locale, detailRes)) {
788                    continue;
789                }
790                List<CmsProperty> detailProps = m_guestCms.readPropertyObjects(detailRes, true);
791                String detailLink = getDetailLink(containerPage, detailRes, locale);
792                String detailLinkRootPath = detailLink;
793                try {
794                    detailLinkRootPath = (new URI(detailLink)).getPath();
795                    detailLinkRootPath = CmsLinkManager.removeOpenCmsContext(detailLinkRootPath);
796                    detailLinkRootPath = m_siteGuestCms.addSiteRoot(detailLinkRootPath);
797                } catch (URISyntaxException e) {
798                    // should not happen
799                }
800                if (!m_includeExcludeSet.isExcluded(detailLinkRootPath)) {
801                    CmsXmlSitemapUrlBean detailUrlBean = new CmsXmlSitemapUrlBean(
802                        replaceServerUri(detailLink),
803                        detailRes.getDateLastModified(),
804                        getChangeFrequency(detailProps),
805                        getPriority(detailProps));
806                    detailUrlBean.setOriginalResource(detailRes);
807                    detailUrlBean.setDetailPageResource(containerPage);
808                    addResult(detailUrlBean, 2);
809                }
810            }
811        }
812    }
813
814    /**
815     * Gets all resources from the folder tree beneath the base folder or the shared folder which have a given type.<p>
816     *
817     * @param type the type to filter by
818     *
819     * @return the list of resources with the given type
820     *
821     * @throws CmsException if something goes wrong
822     */
823    private List<CmsResource> getDetailResources(I_CmsResourceType type) throws CmsException {
824
825        String typeName = type.getTypeName();
826        if (!m_detailResources.containsKey(typeName)) {
827            List<CmsResource> result = new ArrayList<CmsResource>();
828            CmsResourceFilter filter = CmsResourceFilter.DEFAULT_FILES.addRequireType(type);
829            List<CmsResource> siteFiles = m_guestCms.readResources(m_siteRoot, filter, true);
830            result.addAll(siteFiles);
831            String shared = CmsFileUtil.removeTrailingSeparator(OpenCms.getSiteManager().getSharedFolder());
832            if (shared != null) {
833                List<CmsResource> sharedFiles = m_guestCms.readResources(shared, filter, true);
834                result.addAll(sharedFiles);
835            }
836            m_detailResources.put(typeName, result);
837        }
838        return m_detailResources.get(typeName);
839    }
840
841    /**
842     * Gets the locale to use for the given resource.<p>
843     *
844     * @param resource the resource
845     * @param propertyList the properties of the resource
846     *
847     * @return the locale to use for the given resource
848     */
849    private Locale getLocale(CmsResource resource, List<CmsProperty> propertyList) {
850
851        return OpenCms.getLocaleManager().getDefaultLocale(m_guestCms, m_guestCms.getSitePath(resource));
852    }
853
854    /**
855     * Reads the data necessary for building the sitemap from the VFS and initializes the internal data structures.<p>
856     *
857     * @param baseSitePath the base site path
858     *
859     * @throws CmsException if something goes wrong
860     */
861    private void initializeFileData(String baseSitePath) throws CmsException {
862
863        m_resultMap.clear();
864        m_siteRootLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, "/");
865        m_siteRootLink = CmsFileUtil.removeTrailingSeparator(m_siteRootLink);
866        m_detailPageInfos = OpenCms.getADEManager().getAllDetailPages(m_guestCms);
867        for (CmsDetailPageInfo detailPageInfo : m_detailPageInfos) {
868            String type = detailPageInfo.getType();
869            String path = detailPageInfo.getUri();
870            path = CmsFileUtil.removeTrailingSeparator(path);
871            m_detailTypesByPage.put(path, type);
872        }
873        List<CmsAlias> siteAliases = OpenCms.getAliasManager().getAliasesForSite(
874            m_siteGuestCms,
875            m_siteGuestCms.getRequestContext().getSiteRoot());
876        for (CmsAlias alias : siteAliases) {
877            if (isAliasBelowBaseFolder(alias) && (alias.getMode() == CmsAliasMode.page)) {
878                CmsUUID aliasId = alias.getStructureId();
879                m_pageAliasesBelowBaseFolderByStructureId.put(aliasId, alias);
880            }
881        }
882
883    }
884}