001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (C) Alkacon Software (https://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: https://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: https://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.site.xmlsitemap; 029 030import org.opencms.ade.configuration.CmsADEConfigData; 031import org.opencms.ade.configuration.CmsADEManager; 032import org.opencms.ade.detailpage.CmsDetailPageInfo; 033import org.opencms.db.CmsAlias; 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsProperty; 036import org.opencms.file.CmsPropertyDefinition; 037import org.opencms.file.CmsRequestContext; 038import org.opencms.file.CmsResource; 039import org.opencms.file.CmsResourceFilter; 040import org.opencms.file.CmsVfsResourceNotFoundException; 041import org.opencms.file.types.CmsResourceTypeHtmlRedirect; 042import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 043import org.opencms.file.types.I_CmsResourceType; 044import org.opencms.gwt.shared.alias.CmsAliasMode; 045import org.opencms.jsp.CmsJspNavBuilder; 046import org.opencms.jsp.CmsJspNavElement; 047import org.opencms.loader.CmsLoaderException; 048import org.opencms.loader.CmsResourceManager; 049import org.opencms.main.CmsException; 050import org.opencms.main.CmsLog; 051import org.opencms.main.OpenCms; 052import org.opencms.relations.CmsRelation; 053import org.opencms.relations.CmsRelationFilter; 054import org.opencms.relations.CmsRelationType; 055import org.opencms.site.CmsSite; 056import org.opencms.staticexport.CmsLinkManager; 057import org.opencms.util.CmsFileUtil; 058import org.opencms.util.CmsStringUtil; 059import org.opencms.util.CmsUUID; 060 061import java.net.URI; 062import java.net.URISyntaxException; 063import java.util.ArrayList; 064import java.util.Collection; 065import java.util.HashMap; 066import java.util.HashSet; 067import java.util.Iterator; 068import java.util.LinkedHashMap; 069import java.util.List; 070import java.util.Locale; 071import java.util.Map; 072import java.util.Set; 073 074import org.apache.commons.logging.Log; 075 076import com.google.common.collect.ArrayListMultimap; 077import com.google.common.collect.Multimap; 078 079/** 080 * Class for generating XML sitemaps for SEO purposes, as described in 081 * <a href="http://www.sitemaps.org/protocol.html">http://www.sitemaps.org/protocol.html</a>.<p> 082 */ 083public class CmsXmlSitemapGenerator { 084 085 /** 086 * A bean that consists of a sitemap URL bean and a priority score, to determine which of multiple entries with the same 087 * URL are to be preferred.<p> 088 */ 089 protected class ResultEntry { 090 091 /** Internal priority to determine which of multiple entries with the same URL is used. 092 * Note that this has nothing to do with the priority in the URL bean itself! 093 */ 094 private int m_priority; 095 096 /** The URL bean. */ 097 private CmsXmlSitemapUrlBean m_urlBean; 098 099 /** 100 * Creates a new result entry.<p> 101 * 102 * @param urlBean the url bean 103 * 104 * @param priority the internal priority 105 */ 106 public ResultEntry(CmsXmlSitemapUrlBean urlBean, int priority) { 107 108 m_priority = priority; 109 m_urlBean = urlBean; 110 } 111 112 /** 113 * Gets the internal priority used to determine which of multiple entries with the same URL to use.<p> 114 * This has nothing to do with the priority defined in the URL beans themselves! 115 * 116 * @return the internal priority 117 */ 118 public int getPriority() { 119 120 return m_priority; 121 } 122 123 /** 124 * Gets the URL bean.<p> 125 * 126 * @return the URL bean 127 */ 128 public CmsXmlSitemapUrlBean getUrlBean() { 129 130 return m_urlBean; 131 } 132 } 133 134 /** The default change frequency. */ 135 public static final String DEFAULT_CHANGE_FREQUENCY = "daily"; 136 137 /** The default priority. */ 138 public static final double DEFAULT_PRIORITY = 0.5; 139 140 /** Sitemap attribute to exclude empty detail pages relevant for settings only. */ 141 public static final String ATTR_DETAIL_SETTINGS_PAGE_EXCLUDE = "template.detailsettingspage.exclude"; 142 143 /** The logger instance for this class. */ 144 private static final Log LOG = CmsLog.getLog(CmsXmlSitemapGenerator.class); 145 146 /** The root path for the sitemap root folder. */ 147 protected String m_baseFolderRootPath; 148 149 /** The site path of the base folder. */ 150 protected String m_baseFolderSitePath; 151 152 /** Flag to control whether container page dates should be computed. */ 153 protected boolean m_computeContainerPageDates; 154 155 /** The list of detail page info beans. */ 156 protected List<CmsDetailPageInfo> m_detailPageInfos = new ArrayList<CmsDetailPageInfo>(); 157 158 /** A map from type names to lists of potential detail resources of that type. */ 159 protected Map<String, List<CmsResource>> m_detailResources = new HashMap<String, List<CmsResource>>(); 160 161 /** A multimap from detail page root paths to corresponding types. */ 162 protected Multimap<String, String> m_detailTypesByPage = ArrayListMultimap.create(); 163 164 /** A CMS context with guest privileges. */ 165 protected CmsObject m_guestCms; 166 167 /** The include/exclude configuration used for choosing pages for the XML sitemap. */ 168 protected CmsPathIncludeExcludeSet m_includeExcludeSet = new CmsPathIncludeExcludeSet(); 169 170 /** A map from structure ids to page aliases below the base folder which point to the given structure id. */ 171 protected Multimap<CmsUUID, CmsAlias> m_pageAliasesBelowBaseFolderByStructureId = ArrayListMultimap.create(); 172 173 /** The map used for storing the results, with URLs as keys. */ 174 protected Map<String, ResultEntry> m_resultMap = new LinkedHashMap<String, ResultEntry>(); 175 176 /** A guest user CMS object with the site root of the base folder. */ 177 protected CmsObject m_siteGuestCms; 178 179 /** The site root of the base folder. */ 180 protected String m_siteRoot; 181 182 /** A link to the site root. */ 183 protected String m_siteRootLink; 184 185 /** Configured replacement server URL. */ 186 private String m_serverUrl; 187 188 /** 189 * Creates a new sitemap generator instance.<p> 190 * 191 * @param folderRootPath the root folder for the XML sitemap to generate 192 * 193 * @throws CmsException if something goes wrong 194 */ 195 public CmsXmlSitemapGenerator(String folderRootPath) 196 throws CmsException { 197 198 m_baseFolderRootPath = CmsFileUtil.removeTrailingSeparator(folderRootPath); 199 m_guestCms = OpenCms.initCmsObject(OpenCms.getDefaultUsers().getUserGuest()); 200 m_siteGuestCms = OpenCms.initCmsObject(m_guestCms); 201 CmsSite site = OpenCms.getSiteManager().getSiteForRootPath(CmsStringUtil.joinPaths(folderRootPath, "/")); 202 m_siteRoot = site.getSiteRoot(); 203 204 m_siteGuestCms.getRequestContext().setSiteRoot(m_siteRoot); 205 m_baseFolderSitePath = CmsStringUtil.joinPaths( 206 "/", 207 m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath)); 208 } 209 210 /** 211 * Replaces the protocol/host/port of a link with the ones from the given server URI, if it's not empty.<p> 212 * 213 * @param link the link to change 214 * @param server the server URI string 215 216 * @return the changed link 217 */ 218 public static String replaceServerUri(String link, String server) { 219 220 String serverUriStr = server; 221 222 if (CmsStringUtil.isEmptyOrWhitespaceOnly(serverUriStr)) { 223 return link; 224 } 225 try { 226 URI serverUri = new URI(serverUriStr); 227 URI linkUri = new URI(link); 228 URI result = new URI( 229 serverUri.getScheme(), 230 serverUri.getAuthority(), 231 linkUri.getPath(), 232 linkUri.getQuery(), 233 linkUri.getFragment()); 234 return result.toString(); 235 } catch (URISyntaxException e) { 236 LOG.error(e.getLocalizedMessage(), e); 237 return link; 238 } 239 240 } 241 242 /** 243 * Gets the change frequency for a sitemap entry from a list of properties.<p> 244 * 245 * If the change frequency is not defined in the properties, this method will return null.<p> 246 * 247 * @param properties the properties from which the change frequency should be obtained 248 * 249 * @return the change frequency string 250 */ 251 protected static String getChangeFrequency(List<CmsProperty> properties) { 252 253 CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_CHANGEFREQ, properties); 254 if (prop.isNullProperty()) { 255 return null; 256 } 257 String result = prop.getValue().trim(); 258 return result; 259 } 260 261 /** 262 * Gets the page priority from a list of properties.<p> 263 * 264 * If the page priority can't be found among the properties, -1 will be returned.<p> 265 * 266 * @param properties the properties of a resource 267 * 268 * @return the page priority read from the properties, or -1 269 */ 270 protected static double getPriority(List<CmsProperty> properties) { 271 272 CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_PRIORITY, properties); 273 if (prop.isNullProperty()) { 274 return -1.0; 275 } 276 try { 277 double result = Double.parseDouble(prop.getValue().trim()); 278 return result; 279 } catch (NumberFormatException e) { 280 return -1.0; 281 } 282 } 283 284 /** 285 * Removes files marked as internal from a resource list.<p> 286 * 287 * @param resources the list which should be replaced 288 */ 289 protected static void removeInternalFiles(List<CmsResource> resources) { 290 291 Iterator<CmsResource> iter = resources.iterator(); 292 while (iter.hasNext()) { 293 CmsResource resource = iter.next(); 294 if (resource.isInternal()) { 295 iter.remove(); 296 } 297 } 298 } 299 300 /** 301 * Generates a list of XML sitemap entry beans for the root folder which has been set in the constructor.<p> 302 * 303 * @return the list of XML sitemap entries 304 * 305 * @throws CmsException if something goes wrong 306 */ 307 public List<CmsXmlSitemapUrlBean> generateSitemapBeans() throws CmsException { 308 309 String baseSitePath = m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath); 310 initializeFileData(baseSitePath); 311 for (CmsResource resource : getDirectPages()) { 312 if (CmsResourceTypeHtmlRedirect.isRedirect(resource)) { 313 continue; 314 } 315 String sitePath = m_siteGuestCms.getSitePath(resource); 316 List<CmsProperty> propertyList = m_siteGuestCms.readPropertyObjects(resource, true); 317 String onlineLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, sitePath); 318 boolean isContainerPage = CmsResourceTypeXmlContainerPage.isContainerPage(resource); 319 long dateModified = resource.getDateLastModified(); 320 if (isContainerPage) { 321 if (m_computeContainerPageDates) { 322 dateModified = computeContainerPageModificationDate(resource); 323 } else { 324 dateModified = -1; 325 } 326 } 327 CmsXmlSitemapUrlBean urlBean = new CmsXmlSitemapUrlBean( 328 replaceServerUri(onlineLink), 329 dateModified, 330 getChangeFrequency(propertyList), 331 getPriority(propertyList)); 332 urlBean.setOriginalResource(resource); 333 boolean isDefaultDetailPage = isDefaultDetailPage(resource); 334 List<I_CmsResourceType> types = getDetailTypesForPage(resource); 335 if (isDefaultDetailPage) { // default detail page 336 if (!excludeDetailPage(resource)) { 337 addResult(urlBean, 3); 338 } 339 } else if (types.isEmpty()) { // not a detail page 340 addResult(urlBean, 3); 341 } else { // typed detail page 342 if (!excludeDetailPage(resource)) { 343 addResult(urlBean, 3); 344 } 345 Locale locale = getLocale(resource, propertyList); 346 addDetailLinks(resource, locale, types); 347 } 348 } 349 350 for (CmsUUID aliasStructureId : m_pageAliasesBelowBaseFolderByStructureId.keySet()) { 351 addAliasLinks(aliasStructureId); 352 } 353 354 List<CmsXmlSitemapUrlBean> result = new ArrayList<CmsXmlSitemapUrlBean>(); 355 for (ResultEntry resultEntry : m_resultMap.values()) { 356 result.add(resultEntry.getUrlBean()); 357 } 358 return result; 359 } 360 361 /** 362 * Gets the include/exclude configuration of this XML sitemap generator.<p> 363 * 364 * @return the include/exclude configuration 365 */ 366 public CmsPathIncludeExcludeSet getIncludeExcludeSet() { 367 368 return m_includeExcludeSet; 369 } 370 371 /** 372 * Generates a sitemap and formats it as a string.<p> 373 * 374 * @return the sitemap XML data 375 * 376 * @throws CmsException if something goes wrong 377 */ 378 public String renderSitemap() throws CmsException { 379 380 StringBuffer buffer = new StringBuffer(); 381 List<CmsXmlSitemapUrlBean> urlBeans = generateSitemapBeans(); 382 buffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 383 buffer.append(getUrlSetOpenTag() + "\n"); 384 for (CmsXmlSitemapUrlBean bean : urlBeans) { 385 buffer.append(getXmlForEntry(bean)); 386 buffer.append("\n"); 387 } 388 buffer.append("</urlset>"); 389 return buffer.toString(); 390 } 391 392 /** 393 * Enables or disables computation of container page dates.<p> 394 * 395 * @param computeContainerPageDates the new value 396 */ 397 public void setComputeContainerPageDates(boolean computeContainerPageDates) { 398 399 m_computeContainerPageDates = computeContainerPageDates; 400 } 401 402 /** 403 * Sets the replacement server URL.<p> 404 * 405 * The replacement server URL will replace the scheme/host/port from the URLs returned by getOnlineLink. 406 * 407 * @param serverUrl the server URL 408 */ 409 public void setServerUrl(String serverUrl) { 410 411 m_serverUrl = serverUrl; 412 } 413 414 /** 415 * Adds the detail page links for a given page to the results.<p> 416 * 417 * @param containerPage the container page resource 418 * @param locale the locale of the container page 419 * 420 * @throws CmsException if something goes wrong 421 */ 422 protected void addDetailLinks(CmsResource containerPage, Locale locale) throws CmsException { 423 424 List<I_CmsResourceType> types = getDetailTypesForPage(containerPage); 425 addDetailLinks(containerPage, locale, types); 426 } 427 428 /** 429 * Adds an URL bean to the internal map of results, but only if there is no existing entry with higher internal priority 430 * than the priority given as an argument.<p> 431 * 432 * @param result the result URL bean to add 433 * 434 * @param resultPriority the internal priority to use for updating the map of results 435 */ 436 protected void addResult(CmsXmlSitemapUrlBean result, int resultPriority) { 437 438 String url = CmsFileUtil.removeTrailingSeparator(result.getUrl()); 439 boolean writeEntry = true; 440 if (m_resultMap.containsKey(url)) { 441 LOG.warn("Encountered duplicate URL with while generating sitemap: " + result.getUrl()); 442 ResultEntry entry = m_resultMap.get(url); 443 writeEntry = entry.getPriority() <= resultPriority; 444 } 445 if (writeEntry) { 446 m_resultMap.put(url, new ResultEntry(result, resultPriority)); 447 } 448 } 449 450 /** 451 * Computes the container the container page modification date from its referenced contents.<p> 452 * 453 * @param containerPage the container page 454 * 455 * @return the computed modification date 456 * 457 * @throws CmsException if something goes wrong 458 */ 459 protected long computeContainerPageModificationDate(CmsResource containerPage) throws CmsException { 460 461 CmsRelationFilter filter = CmsRelationFilter.relationsFromStructureId( 462 containerPage.getStructureId()).filterType(CmsRelationType.XML_STRONG); 463 List<CmsRelation> relations = m_guestCms.readRelations(filter); 464 long result = containerPage.getDateLastModified(); 465 for (CmsRelation relation : relations) { 466 try { 467 CmsResource target = relation.getTarget( 468 m_guestCms, 469 CmsResourceFilter.DEFAULT_FILES.addRequireVisible()); 470 long targetDate = target.getDateLastModified(); 471 if (targetDate > result) { 472 result = targetDate; 473 } 474 } catch (CmsException e) { 475 LOG.warn( 476 "Could not get relation target for relation " 477 + relation.toString() 478 + " | " 479 + e.getLocalizedMessage(), 480 e); 481 } 482 } 483 484 return result; 485 } 486 487 /** 488 * Returns whether to exclude the given detail page. 489 * @param detailPage the detail page 490 * @return whether to exclude the given detail page 491 */ 492 protected boolean excludeDetailPage(CmsResource detailPage) { 493 494 CmsADEConfigData adeConfigData = OpenCms.getADEManager().lookupConfigurationWithCache( 495 m_guestCms, 496 detailPage.getRootPath()); 497 String exclude = adeConfigData.getAttribute(ATTR_DETAIL_SETTINGS_PAGE_EXCLUDE, null); 498 return Boolean.valueOf(exclude); 499 } 500 501 /** 502 * Gets the detail link for a given container page and detail content.<p> 503 * 504 * Note: The actual container page used for the result link is not necessarily the container page passed 505 * in as parameter - the default detail page in the sitemap containing the page is used. 506 * 507 * @param pageRes the container page 508 * @param detailRes the detail content 509 * @param locale the locale for which we want the link 510 * 511 * @return the detail page link 512 */ 513 protected String getDetailLink(CmsResource pageRes, CmsResource detailRes, Locale locale) { 514 515 String pageSitePath = m_siteGuestCms.getSitePath(pageRes); 516 String detailSitePath = m_siteGuestCms.getSitePath(detailRes); 517 CmsRequestContext requestContext = m_siteGuestCms.getRequestContext(); 518 String originalUri = requestContext.getUri(); 519 Locale originalLocale = requestContext.getLocale(); 520 try { 521 requestContext.setUri(pageSitePath); 522 requestContext.setLocale(locale); 523 return OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, detailSitePath, true); 524 } finally { 525 requestContext.setUri(originalUri); 526 requestContext.setLocale(originalLocale); 527 } 528 } 529 530 /** 531 * Gets the types for which a given resource is configured as a detail page.<p> 532 * 533 * @param resource a resource for which we want to find the detail page types 534 * 535 * @return the list of resource types for which the given page is configured as a detail page 536 */ 537 protected List<I_CmsResourceType> getDetailTypesForPage(CmsResource resource) { 538 539 Collection<String> typesForPage = m_detailTypesByPage.get(resource.getRootPath()); 540 String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath())); 541 Collection<String> typesForFolder = m_detailTypesByPage.get(parentPath); 542 Set<String> allTypes = new HashSet<String>(); 543 allTypes.addAll(typesForPage); 544 allTypes.addAll(typesForFolder); 545 List<I_CmsResourceType> resTypes = new ArrayList<I_CmsResourceType>(); 546 CmsResourceManager resMan = OpenCms.getResourceManager(); 547 for (String typeName : allTypes) { 548 if (typeName.startsWith(CmsDetailPageInfo.FUNCTION_PREFIX)) { 549 continue; 550 } 551 try { 552 I_CmsResourceType resType = resMan.getResourceType(typeName); 553 resTypes.add(resType); 554 } catch (CmsLoaderException e) { 555 LOG.warn("Invalid resource type name" + typeName + "! " + e.getLocalizedMessage(), e); 556 } 557 } 558 return resTypes; 559 } 560 561 /** 562 * Gets the list of pages which should be directly added to the XML sitemap.<p> 563 * 564 * @return the list of resources which should be directly added to the XML sitemap 565 * 566 * @throws CmsException if something goes wrong 567 */ 568 protected List<CmsResource> getDirectPages() throws CmsException { 569 570 List<CmsResource> result = new ArrayList<CmsResource>(); 571 result.addAll(getNavigationPages()); 572 Set<String> includeRoots = m_includeExcludeSet.getIncludeRoots(); 573 for (String includeRoot : includeRoots) { 574 try { 575 CmsResource resource = m_guestCms.readResource(includeRoot); 576 if (resource.isFile()) { 577 result.add(resource); 578 } else { 579 List<CmsResource> subtreeFiles = m_guestCms.readResources( 580 includeRoot, 581 CmsResourceFilter.DEFAULT_FILES, 582 true); 583 result.addAll(subtreeFiles); 584 } 585 } catch (CmsVfsResourceNotFoundException e) { 586 LOG.warn("Could not read include resource: " + includeRoot); 587 } 588 } 589 Iterator<CmsResource> filterIter = result.iterator(); 590 while (filterIter.hasNext()) { 591 CmsResource currentResource = filterIter.next(); 592 if (currentResource.isInternal() || m_includeExcludeSet.isExcluded(currentResource.getRootPath())) { 593 filterIter.remove(); 594 } 595 } 596 return result; 597 } 598 599 /** 600 * Writes the inner node content for an url element to a buffer.<p> 601 * 602 * @param entry the entry for which the content should be written 603 * @return the inner XML 604 */ 605 protected String getInnerXmlForEntry(CmsXmlSitemapUrlBean entry) { 606 607 StringBuffer buffer = new StringBuffer(); 608 entry.writeElement(buffer, "loc", entry.getUrl()); 609 entry.writeLastmod(buffer); 610 entry.writeChangefreq(buffer); 611 entry.writePriority(buffer); 612 return buffer.toString(); 613 } 614 615 /** 616 * Gets the list of pages from the navigation which should be directly added to the XML sitemap.<p> 617 * 618 * @return the list of pages to add to the XML sitemap 619 */ 620 protected List<CmsResource> getNavigationPages() { 621 622 List<CmsResource> result = new ArrayList<CmsResource>(); 623 CmsJspNavBuilder navBuilder = new CmsJspNavBuilder(m_siteGuestCms); 624 try { 625 CmsResource rootDefaultFile = m_siteGuestCms.readDefaultFile( 626 m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath), 627 CmsResourceFilter.DEFAULT); 628 if (rootDefaultFile != null) { 629 result.add(rootDefaultFile); 630 } 631 } catch (Exception e) { 632 LOG.info(e.getLocalizedMessage(), e); 633 } 634 List<CmsJspNavElement> navElements = navBuilder.getSiteNavigation( 635 m_baseFolderSitePath, 636 CmsJspNavBuilder.Visibility.includeHidden, 637 -1); 638 for (CmsJspNavElement navElement : navElements) { 639 CmsResource navResource = navElement.getResource(); 640 if (navResource.isFolder()) { 641 try { 642 CmsResource defaultFile = m_guestCms.readDefaultFile(navResource, CmsResourceFilter.DEFAULT_FILES); 643 if (defaultFile != null) { 644 result.add(defaultFile); 645 } else { 646 LOG.warn("Could not get default file for " + navResource.getRootPath()); 647 } 648 } catch (CmsException e) { 649 LOG.warn("Could not get default file for " + navResource.getRootPath()); 650 } 651 } else { 652 result.add(navResource); 653 } 654 } 655 return result; 656 } 657 658 /** 659 * Gets the opening tag for the urlset element (can be overridden to add e.g. more namespaces.<p> 660 * 661 * @return the opening tag 662 */ 663 protected String getUrlSetOpenTag() { 664 665 return "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"; 666 } 667 668 /** 669 * Writes the XML for an URL entry to a buffer.<p> 670 * 671 * @param entry the XML sitemap entry bean 672 * 673 * @return an XML representation of this bean 674 */ 675 protected String getXmlForEntry(CmsXmlSitemapUrlBean entry) { 676 677 StringBuffer buffer = new StringBuffer(); 678 buffer.append("<url>"); 679 buffer.append(getInnerXmlForEntry(entry)); 680 buffer.append("</url>"); 681 return buffer.toString(); 682 } 683 684 /** 685 * Checks whether the given alias is below the base folder.<p> 686 * 687 * @param alias the alias to check 688 * 689 * @return true if the alias is below the base folder 690 */ 691 protected boolean isAliasBelowBaseFolder(CmsAlias alias) { 692 693 boolean isBelowBaseFolder = CmsStringUtil.isPrefixPath(m_baseFolderSitePath, alias.getAliasPath()); 694 return isBelowBaseFolder; 695 } 696 697 /** 698 * Returns whether the given page is a default detail page. 699 * @param resource the page resource 700 * @return whether the given page is a default detail page 701 */ 702 protected boolean isDefaultDetailPage(CmsResource resource) { 703 704 Collection<String> typesForPage = m_detailTypesByPage.get(resource.getRootPath()); 705 String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath())); 706 Collection<String> typesForFolder = m_detailTypesByPage.get(parentPath); 707 Set<String> allTypes = new HashSet<String>(); 708 allTypes.addAll(typesForPage); 709 allTypes.addAll(typesForFolder); 710 for (String typeName : allTypes) { 711 if (typeName.equals(CmsADEManager.DEFAULT_DETAILPAGE_TYPE)) { 712 return true; 713 } 714 } 715 return false; 716 } 717 718 /** 719 * Checks whether the page/detail content combination is a valid detail page.<p> 720 * 721 * @param page the container page 722 * @param locale the locale 723 * @param detailRes the detail content resource 724 * 725 * @return true if this is a valid detail page combination 726 */ 727 protected boolean isValidDetailPageCombination(CmsResource page, Locale locale, CmsResource detailRes) { 728 729 return OpenCms.getADEManager().getDetailPageHandler().isValidDetailPage(m_guestCms, page, detailRes); 730 } 731 732 /** 733 * Replaces the protocol/host/port of a link with the ones from the configured server URI, if it's not empty.<p> 734 * 735 * @param link the link to change 736 * 737 * @return the changed link 738 */ 739 protected String replaceServerUri(String link) { 740 741 return replaceServerUri(link, m_serverUrl); 742 } 743 744 /** 745 * Adds the alias links for a given structure id to the results.<p> 746 * 747 * @param aliasStructureId the alias target structure id 748 */ 749 private void addAliasLinks(CmsUUID aliasStructureId) { 750 751 try { 752 CmsResource aliasTarget = m_guestCms.readResource(aliasStructureId); 753 List<CmsProperty> properties = m_guestCms.readPropertyObjects(aliasTarget, true); 754 double priority = getPriority(properties); 755 String changeFrequency = getChangeFrequency(properties); 756 Collection<CmsAlias> aliases = m_pageAliasesBelowBaseFolderByStructureId.get(aliasStructureId); 757 for (CmsAlias alias : aliases) { 758 String aliasLink = (m_siteRootLink + "/" + alias.getAliasPath()).replaceAll("(?<!:)//+", "/"); 759 CmsXmlSitemapUrlBean aliasUrlBean = new CmsXmlSitemapUrlBean( 760 replaceServerUri(aliasLink), 761 -1, 762 changeFrequency, 763 priority); 764 aliasUrlBean.setOriginalResource(aliasTarget); 765 addResult(aliasUrlBean, 1); 766 } 767 } catch (CmsException e) { 768 LOG.error(e.getLocalizedMessage(), e); 769 } 770 } 771 772 /** 773 * Adds the detail page links for a given page to the results.<p> 774 * 775 * @param containerPage the container page resource 776 * @param locale the locale of the container page 777 * @param types the detail types 778 * 779 * @throws CmsException if something goes wrong 780 */ 781 private void addDetailLinks(CmsResource containerPage, Locale locale, List<I_CmsResourceType> types) 782 throws CmsException { 783 784 for (I_CmsResourceType type : types) { 785 List<CmsResource> resourcesForType = getDetailResources(type); 786 for (CmsResource detailRes : resourcesForType) { 787 if (!isValidDetailPageCombination(containerPage, locale, detailRes)) { 788 continue; 789 } 790 List<CmsProperty> detailProps = m_guestCms.readPropertyObjects(detailRes, true); 791 String detailLink = getDetailLink(containerPage, detailRes, locale); 792 String detailLinkRootPath = detailLink; 793 try { 794 detailLinkRootPath = (new URI(detailLink)).getPath(); 795 detailLinkRootPath = CmsLinkManager.removeOpenCmsContext(detailLinkRootPath); 796 detailLinkRootPath = m_siteGuestCms.addSiteRoot(detailLinkRootPath); 797 } catch (URISyntaxException e) { 798 // should not happen 799 } 800 if (!m_includeExcludeSet.isExcluded(detailLinkRootPath)) { 801 CmsXmlSitemapUrlBean detailUrlBean = new CmsXmlSitemapUrlBean( 802 replaceServerUri(detailLink), 803 detailRes.getDateLastModified(), 804 getChangeFrequency(detailProps), 805 getPriority(detailProps)); 806 detailUrlBean.setOriginalResource(detailRes); 807 detailUrlBean.setDetailPageResource(containerPage); 808 addResult(detailUrlBean, 2); 809 } 810 } 811 } 812 } 813 814 /** 815 * Gets all resources from the folder tree beneath the base folder or the shared folder which have a given type.<p> 816 * 817 * @param type the type to filter by 818 * 819 * @return the list of resources with the given type 820 * 821 * @throws CmsException if something goes wrong 822 */ 823 private List<CmsResource> getDetailResources(I_CmsResourceType type) throws CmsException { 824 825 String typeName = type.getTypeName(); 826 if (!m_detailResources.containsKey(typeName)) { 827 List<CmsResource> result = new ArrayList<CmsResource>(); 828 CmsResourceFilter filter = CmsResourceFilter.DEFAULT_FILES.addRequireType(type); 829 List<CmsResource> siteFiles = m_guestCms.readResources(m_siteRoot, filter, true); 830 result.addAll(siteFiles); 831 String shared = CmsFileUtil.removeTrailingSeparator(OpenCms.getSiteManager().getSharedFolder()); 832 if (shared != null) { 833 List<CmsResource> sharedFiles = m_guestCms.readResources(shared, filter, true); 834 result.addAll(sharedFiles); 835 } 836 m_detailResources.put(typeName, result); 837 } 838 return m_detailResources.get(typeName); 839 } 840 841 /** 842 * Gets the locale to use for the given resource.<p> 843 * 844 * @param resource the resource 845 * @param propertyList the properties of the resource 846 * 847 * @return the locale to use for the given resource 848 */ 849 private Locale getLocale(CmsResource resource, List<CmsProperty> propertyList) { 850 851 return OpenCms.getLocaleManager().getDefaultLocale(m_guestCms, m_guestCms.getSitePath(resource)); 852 } 853 854 /** 855 * Reads the data necessary for building the sitemap from the VFS and initializes the internal data structures.<p> 856 * 857 * @param baseSitePath the base site path 858 * 859 * @throws CmsException if something goes wrong 860 */ 861 private void initializeFileData(String baseSitePath) throws CmsException { 862 863 m_resultMap.clear(); 864 m_siteRootLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, "/"); 865 m_siteRootLink = CmsFileUtil.removeTrailingSeparator(m_siteRootLink); 866 m_detailPageInfos = OpenCms.getADEManager().getAllDetailPages(m_guestCms); 867 for (CmsDetailPageInfo detailPageInfo : m_detailPageInfos) { 868 String type = detailPageInfo.getType(); 869 String path = detailPageInfo.getUri(); 870 path = CmsFileUtil.removeTrailingSeparator(path); 871 m_detailTypesByPage.put(path, type); 872 } 873 List<CmsAlias> siteAliases = OpenCms.getAliasManager().getAliasesForSite( 874 m_siteGuestCms, 875 m_siteGuestCms.getRequestContext().getSiteRoot()); 876 for (CmsAlias alias : siteAliases) { 877 if (isAliasBelowBaseFolder(alias) && (alias.getMode() == CmsAliasMode.page)) { 878 CmsUUID aliasId = alias.getStructureId(); 879 m_pageAliasesBelowBaseFolderByStructureId.put(aliasId, alias); 880 } 881 } 882 883 } 884}