001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.ade.containerpage; 029 030import org.opencms.ade.configuration.CmsADEConfigData; 031import org.opencms.ade.containerpage.shared.CmsFormatterConfig; 032import org.opencms.file.CmsObject; 033import org.opencms.file.CmsProperty; 034import org.opencms.file.CmsResource; 035import org.opencms.file.CmsResourceFilter; 036import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 037import org.opencms.file.types.I_CmsResourceType; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsLog; 040import org.opencms.main.OpenCms; 041import org.opencms.util.CmsUUID; 042import org.opencms.xml.containerpage.CmsContainerBean; 043import org.opencms.xml.containerpage.CmsContainerElementBean; 044import org.opencms.xml.containerpage.CmsContainerPageBean; 045import org.opencms.xml.containerpage.CmsGroupContainerBean; 046import org.opencms.xml.containerpage.CmsXmlContainerPage; 047import org.opencms.xml.containerpage.CmsXmlContainerPageFactory; 048import org.opencms.xml.containerpage.CmsXmlGroupContainer; 049import org.opencms.xml.containerpage.CmsXmlGroupContainerFactory; 050import org.opencms.xml.containerpage.I_CmsFormatterBean; 051 052import java.io.ByteArrayInputStream; 053import java.io.ByteArrayOutputStream; 054import java.io.IOException; 055import java.io.ObjectInputStream; 056import java.io.ObjectOutputStream; 057import java.io.Serializable; 058import java.util.Collection; 059import java.util.Collections; 060import java.util.HashMap; 061import java.util.HashSet; 062import java.util.LinkedHashMap; 063import java.util.List; 064import java.util.Locale; 065import java.util.Map; 066import java.util.Set; 067import java.util.regex.Pattern; 068import java.util.stream.Collectors; 069import java.util.zip.DeflaterOutputStream; 070import java.util.zip.InflaterInputStream; 071 072import org.apache.commons.logging.Log; 073 074import com.google.common.collect.ArrayListMultimap; 075import com.google.common.collect.Multimap; 076 077/** 078 * Analyzes content type and formatter usage in a site / folder. 079 */ 080public class CmsTypeAnalyzer { 081 082 /** 083 * Bean for formatter information. 084 */ 085 public static class FormatterBean implements Serializable { 086 087 /** Serial version id. */ 088 private static final long serialVersionUID = 1L; 089 090 /** The id. */ 091 private CmsUUID m_id; 092 093 /** The key. */ 094 private String m_key; 095 096 /** The path. */ 097 private String m_path; 098 099 /** The nice name. */ 100 private String m_niceName; 101 102 /** 103 * Instantiates a new formatter bean. 104 * 105 * @param id the id 106 * @param path the path 107 * @param key the key 108 * @param niceName the nice name 109 */ 110 public FormatterBean(CmsUUID id, String path, String key, String niceName) { 111 112 super(); 113 m_id = id; 114 m_path = path; 115 m_key = key; 116 m_niceName = niceName; 117 } 118 119 /** 120 * Gets the id. 121 * 122 * @return the id 123 */ 124 public CmsUUID getId() { 125 126 return m_id; 127 } 128 129 /** 130 * Gets the key. 131 * 132 * @return the key 133 */ 134 public String getKey() { 135 136 return m_key; 137 } 138 139 /** 140 * Gets the nice name. 141 * 142 * @return the nice name 143 */ 144 public String getNiceName() { 145 146 return m_niceName; 147 } 148 149 /** 150 * Gets the path. 151 * 152 * @return the path 153 */ 154 public String getPath() { 155 156 return m_path; 157 } 158 159 } 160 161 /** 162 * Data for a single resource. 163 */ 164 public static class ResourceBean implements Serializable { 165 166 /** Serial version id. */ 167 private static final long serialVersionUID = 1L; 168 169 /** The id. */ 170 private CmsUUID m_id; 171 172 /** The path. */ 173 private String m_path; 174 175 /** 176 * Instantiates a new resource bean. 177 * 178 * @param id the id 179 * @param path the path 180 */ 181 public ResourceBean(CmsUUID id, String path) { 182 183 super(); 184 m_id = id; 185 m_path = path; 186 } 187 188 /** 189 * Gets the id. 190 * 191 * @return the id 192 */ 193 public CmsUUID getId() { 194 195 return m_id; 196 } 197 198 /** 199 * Gets the path. 200 * 201 * @return the path 202 */ 203 public String getPath() { 204 205 return m_path; 206 } 207 } 208 209 /** 210 * Represents all data collected by the CmsTypeAnalyzer class. 211 */ 212 public static class State implements Serializable { 213 214 /** Serial version id. */ 215 private static final long serialVersionUID = 1L; 216 217 /** The type usage. */ 218 protected Map<String, Multimap<CmsUUID, CmsUUID>> m_typeUsage = new HashMap<>(); 219 220 /** The formatters. */ 221 protected Map<CmsUUID, FormatterBean> m_formatters = new HashMap<>(); 222 223 /** The pages. */ 224 protected Map<CmsUUID, ResourceBean> m_pages = new HashMap<>(); 225 226 /** The types. */ 227 protected Map<String, TypeBean> m_types = new LinkedHashMap<>(); 228 229 /** The path. */ 230 protected String m_path; 231 232 /** The site root. */ 233 protected String m_siteRoot; 234 235 /** The set of containers to exclude. */ 236 protected Set<String> m_excludedContainers; 237 238 /** True if detail only contents are skipped. */ 239 public boolean m_skipDetailOnly; 240 241 /** The map of (legacy) function usages. */ 242 private Map<String, Set<String>> m_functionUsage = new HashMap<>(); 243 244 /** The template regex. */ 245 public String m_templateRegex; 246 247 public static long getSerialversionuid() { 248 249 return serialVersionUID; 250 } 251 252 /** 253 * Gets the container names to exclude. 254 * 255 * @return the top-level containers to exclude 256 */ 257 public Set<String> getExcludedContainers() { 258 259 return m_excludedContainers; 260 } 261 262 /** 263 * Gets the formatters. 264 * 265 * @return the formatters 266 */ 267 public Map<CmsUUID, FormatterBean> getFormatters() { 268 269 return m_formatters; 270 } 271 272 /** 273 * Gets the (legacy) dynamic function usages. 274 * 275 * @return the legacy function usages 276 */ 277 public Map<String, Set<String>> getFunctionUsages() { 278 279 return m_functionUsage; 280 } 281 282 /** 283 * Gets the pages. 284 * 285 * @return the pages 286 */ 287 public Map<CmsUUID, ResourceBean> getPages() { 288 289 return m_pages; 290 } 291 292 /** 293 * Gets the pages. 294 * 295 * @param type the type 296 * @param formatter the formatter 297 * @return the pages 298 */ 299 public List<String> getPages(String type, CmsUUID formatter) { 300 301 Collection<CmsUUID> usage = getTypeUsage().get(type).get(formatter); 302 return usage.stream().map(id -> m_pages.get(id).getPath()).distinct().sorted().collect(Collectors.toList()); 303 } 304 305 /** 306 * Gets the path. 307 * 308 * @return the path 309 */ 310 public String getPath() { 311 312 return m_path; 313 } 314 315 /** 316 * Gets the site root. 317 * 318 * @return the site root 319 */ 320 public String getSiteRoot() { 321 322 return m_siteRoot; 323 } 324 325 /** 326 * Gets the sorted formatters. 327 * 328 * @param type the type 329 * @return the sorted formatters 330 */ 331 public List<FormatterBean> getSortedFormatters(String type) { 332 333 if (!m_typeUsage.containsKey(type)) { 334 return Collections.emptyList(); 335 } 336 Multimap<CmsUUID, CmsUUID> formatterUsages = m_typeUsage.get(type); 337 return formatterUsages.keySet().stream().sorted((f1, f2) -> { 338 return -Integer.compare(formatterUsages.get(f1).size(), formatterUsages.get(f2).size()); 339 }).map(id -> m_formatters.get(id)).collect(Collectors.toList()); 340 } 341 342 public String getTemplateRegex() { 343 344 return m_templateRegex; 345 } 346 347 /** 348 * Gets the types. 349 * 350 * @return the types 351 */ 352 public Map<String, TypeBean> getTypes() { 353 354 return m_types; 355 } 356 357 /** 358 * Gets the type usage. 359 * 360 * @return the type usage 361 */ 362 public Map<String, Multimap<CmsUUID, CmsUUID>> getTypeUsage() { 363 364 return m_typeUsage; 365 } 366 367 public boolean isSkipDetailOnly() { 368 369 return m_skipDetailOnly; 370 } 371 } 372 373 /** 374 * Data for a single content type. 375 */ 376 public static class TypeBean implements Serializable { 377 378 /** Serial version id. */ 379 private static final long serialVersionUID = 1L; 380 381 /** The name. */ 382 private String m_name; 383 384 /** The nice name. */ 385 private String m_niceName; 386 387 /** The count. */ 388 private int m_count; 389 390 /** The usage count. */ 391 private int m_usageCount; 392 393 /** 394 * Instantiates a new type bean. 395 * 396 * @param name the name 397 * @param niceName the nice name 398 * @param count the count 399 */ 400 public TypeBean(String name, String niceName, int count) { 401 402 super(); 403 m_name = name; 404 m_niceName = niceName; 405 m_count = count; 406 } 407 408 /** 409 * Gets the count. 410 * 411 * @return the count 412 */ 413 public int getCount() { 414 415 return m_count; 416 } 417 418 /** 419 * Gets the name. 420 * 421 * @return the name 422 */ 423 public String getName() { 424 425 return m_name; 426 } 427 428 /** 429 * Gets the nice name. 430 * 431 * @return the nice name 432 */ 433 public String getNiceName() { 434 435 return m_niceName; 436 } 437 438 /** 439 * Gets the usage count. 440 * 441 * @return the usage count 442 */ 443 public int getUsageCount() { 444 445 return m_usageCount; 446 } 447 448 /** 449 * Sets the count. 450 * 451 * @param count the new count 452 */ 453 public void setCount(int count) { 454 455 m_count = count; 456 } 457 458 /** 459 * Sets the usage count. 460 * 461 * @param referenceCount the new usage count 462 */ 463 public void setUsageCount(int referenceCount) { 464 465 m_usageCount = referenceCount; 466 } 467 } 468 469 /** The Constant LOG. */ 470 private static final Log LOG = CmsLog.getLog(CmsTypeAnalyzer.class); 471 472 /** The Constant UNKNOWN_FORMATTER. */ 473 public static final CmsUUID UNKNOWN_FORMATTER = CmsUUID.getNullUUID(); 474 475 /** The m state. */ 476 private State m_state = new State(); 477 478 /** The m locale. */ 479 private Locale m_locale; 480 481 /** The m cms. */ 482 private CmsObject m_cms; 483 484 private Pattern m_templatePattern; 485 486 /** 487 * Creates a new instance. 488 * 489 * @param cms the CMS context 490 * @param siteRoot the site root 491 * @param path the site path to analyze 492 * @throws CmsException if something goes wrong 493 */ 494 public CmsTypeAnalyzer( 495 CmsObject cms, 496 String siteRoot, 497 String path, 498 boolean skipDetailOnly, 499 Set<String> excludedContainers, 500 String templateRegex) 501 throws CmsException { 502 503 m_cms = OpenCms.initCmsObject(cms); 504 m_cms.getRequestContext().setSiteRoot(siteRoot); 505 m_state.m_path = path; 506 m_state.m_siteRoot = siteRoot; 507 m_state.m_skipDetailOnly = skipDetailOnly; 508 m_state.m_excludedContainers = excludedContainers; 509 m_state.m_templateRegex = templateRegex; 510 m_templatePattern = Pattern.compile(templateRegex); 511 m_locale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms); 512 } 513 514 /** 515 * Deserializes the state from a byte array. 516 * 517 * @param data the data 518 * @return the deserialized state 519 * @throws Exception if something goes wrong 520 */ 521 public static State readState(byte[] data) throws Exception { 522 523 ByteArrayInputStream bais = new ByteArrayInputStream(data); 524 try (ObjectInputStream stream = new ObjectInputStream(new InflaterInputStream(bais))) { 525 return (State)stream.readObject(); 526 } 527 528 } 529 530 /** 531 * Runs the type analysis and returns the state object with all the collected data. 532 * 533 * @param cms the CMS context 534 * @param path the path 535 * @param skipDetailOnly true if detail only pages should be skipped 536 * @param excludeContainersStr a comma-separated list of container names to exclude from analysis (only direct elements) 537 * @param templateRegex a regular expression such that only pages whose template matches that regex should be processed 538 * @return the state 539 * @throws CmsException if something goes wrong 540 */ 541 public static State run( 542 CmsObject cms, 543 String path, 544 boolean skipDetailOnly, 545 String excludeContainersStr, 546 String templateRegex) 547 throws CmsException { 548 549 Set<String> excludedContainers = new HashSet<>(); 550 for (String token : excludeContainersStr.split(",")) { 551 token = token.trim(); 552 if ("".equals(token)) { 553 continue; 554 } 555 excludedContainers.add(token); 556 } 557 558 return (new CmsTypeAnalyzer( 559 cms, 560 cms.getRequestContext().getSiteRoot(), 561 path, 562 skipDetailOnly, 563 excludedContainers, 564 templateRegex)).processFolder(); 565 566 } 567 568 /** 569 * Serializes a state to a byte array. 570 * 571 * @param state the state 572 * @return the serialized data 573 * @throws IOException if something goes wrong with serialization 574 */ 575 public static byte[] writeState(State state) throws IOException { 576 577 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 578 579 try (ObjectOutputStream out = new ObjectOutputStream(new DeflaterOutputStream(baos))) { 580 out.writeObject(state); 581 } 582 return baos.toByteArray(); 583 584 } 585 586 /** 587 * Gets the state. 588 * 589 * @return the state 590 */ 591 public State getState() { 592 593 return m_state; 594 } 595 596 /** 597 * Process folder. 598 * 599 * @return the state 600 * @throws CmsException the cms exception 601 */ 602 public State processFolder() throws CmsException { 603 604 long start = System.currentTimeMillis(); 605 I_CmsResourceType pageType = OpenCms.getResourceManager().getResourceType( 606 CmsResourceTypeXmlContainerPage.RESOURCE_TYPE_NAME); 607 I_CmsResourceType modelGroupType = OpenCms.getResourceManager().getResourceType( 608 CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME); 609 List<CmsResource> pages = m_cms.readResources( 610 m_state.m_path, 611 CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(pageType)); 612 for (CmsResource page : pages) { 613 if (!checkTemplate(page)) { 614 continue; 615 } 616 if (m_state.m_skipDetailOnly && page.getRootPath().contains(".detailContainers")) { 617 continue; 618 } 619 processPage(page); 620 } 621 List<CmsResource> modelGroups = m_cms.readResources( 622 m_state.m_path, 623 CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(modelGroupType)); 624 for (CmsResource modelGroup : modelGroups) { 625 if (!checkTemplate(modelGroup)) { 626 continue; 627 } 628 processPage(modelGroup); 629 } 630 631 List<CmsResource> elementGroups = m_cms.readResources( 632 m_state.m_path, 633 CmsResourceFilter.IGNORE_EXPIRATION.addRequireType( 634 OpenCms.getResourceManager().getResourceType( 635 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME))); 636 for (CmsResource elementGroup : elementGroups) { 637 if (!checkTemplate(elementGroup)) { 638 continue; 639 } 640 processElementGroup(elementGroup); 641 } 642 for (String type : OpenCms.getADEManager().getContentTypeNames(false)) { 643 addTypeInfo(type); 644 } 645 646 long end = System.currentTimeMillis(); 647 648 for (String type : m_state.m_types.keySet()) { 649 Multimap<CmsUUID, CmsUUID> usage = m_state.m_typeUsage.get(type); 650 if (usage != null) { 651 m_state.m_types.get(type).setUsageCount(usage.values().size()); 652 } 653 } 654 LinkedHashMap<String, TypeBean> typesSorted = new LinkedHashMap<>(); 655 m_state.m_types.values().stream().sorted( 656 (a, b) -> Integer.compare(b.getUsageCount(), a.getUsageCount())).forEach( 657 type -> typesSorted.put(type.getName(), type)); 658 m_state.m_types = typesSorted; 659 LOG.info("Processed " + pages.size() + " pages, took " + (end - start) + "ms"); 660 return m_state; 661 662 } 663 664 /** 665 * Adds the entry for a specific content type usage in a page. 666 * 667 * @param typeName the type name 668 * @param pageId the page id 669 * @param formatterId the formatter id 670 */ 671 private void addEntry(String typeName, CmsUUID pageId, CmsUUID formatterId) { 672 673 m_state.m_typeUsage.computeIfAbsent(typeName, k -> ArrayListMultimap.create()).put(formatterId, pageId); 674 675 } 676 677 /** 678 * Adds the resource. 679 * 680 * @param pageResource the page resource 681 */ 682 private void addResource(CmsResource pageResource) { 683 684 m_state.m_pages.put( 685 pageResource.getStructureId(), 686 new ResourceBean(pageResource.getStructureId(), m_cms.getSitePath(pageResource))); 687 } 688 689 /** 690 * Adds the type info. 691 * 692 * @param type the type 693 */ 694 private void addTypeInfo(String type) { 695 696 if (m_state.m_types.get(type) != null) { 697 return; 698 } 699 int count = -1; 700 try { 701 List<CmsResource> resources = m_cms.readResources( 702 m_state.m_path, 703 CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(OpenCms.getResourceManager().getResourceType(type)), 704 true); 705 count = resources.size(); 706 String key = OpenCms.getWorkplaceManager().getExplorerTypeSetting(type).getKey(); 707 String label = OpenCms.getWorkplaceManager().getMessages(m_locale).key(key); 708 m_state.m_types.put(type, new TypeBean(type, label, count)); 709 } catch (Exception e) { 710 LOG.error(e.getLocalizedMessage(), e); 711 } 712 713 } 714 715 /** 716 * If the container element is a (legacy) dynamic function, add it to map of function usages. 717 * 718 * @param pageResource the current page 719 * @param element the container element 720 */ 721 private void checkFunction(CmsResource pageResource, CmsContainerElementBean element) { 722 723 if (OpenCms.getResourceManager().matchResourceType("function", element.getResource().getTypeId())) { 724 m_state.m_functionUsage.computeIfAbsent(element.getResource().getRootPath(), p -> new HashSet<>()).add( 725 m_cms.getSitePath(pageResource)); 726 727 } 728 } 729 730 /** 731 * Checks that the template property of the page matches the template regex.<p> 732 * 733 * @param page the page to check 734 * @return true if the template matches the template regex 735 */ 736 private boolean checkTemplate(CmsResource page) { 737 738 try { 739 CmsProperty templateProp = m_cms.readPropertyObject(page, "template", true); 740 String templateValue = templateProp.getValue(); 741 if (templateValue == null) { 742 templateValue = ""; 743 } 744 return m_templatePattern.matcher(templateValue).matches(); 745 } catch (Exception e) { 746 LOG.error(e.getLocalizedMessage(), e); 747 return false; 748 } 749 } 750 751 /** 752 * Process element group. 753 * 754 * @param groupResource the group resource 755 * @throws CmsException if something goes wrong 756 */ 757 private void processElementGroup(CmsResource groupResource) throws CmsException { 758 759 addResource(groupResource); 760 761 CmsXmlGroupContainer groupXml = CmsXmlGroupContainerFactory.unmarshal(m_cms, m_cms.readFile(groupResource)); 762 CmsGroupContainerBean group = groupXml.getGroupContainer(m_cms); 763 764 for (CmsContainerElementBean element : group.getElements()) { 765 try { 766 element.initResource(m_cms); 767 checkFunction(groupResource, element); 768 addEntry(element.getTypeName(), groupResource.getStructureId(), UNKNOWN_FORMATTER); 769 } catch (Exception e) { 770 LOG.error(e.getLocalizedMessage(), e); 771 } 772 } 773 774 } 775 776 /** 777 * Process page. 778 * 779 * @param pageResource the page resource 780 * @throws CmsException the cms exception 781 */ 782 private void processPage(CmsResource pageResource) throws CmsException { 783 784 LOG.debug("processing page " + pageResource.getRootPath()); 785 786 CmsADEConfigData config = OpenCms.getADEManager().lookupConfigurationWithCache( 787 m_cms, 788 pageResource.getRootPath()); 789 addResource(pageResource); 790 CmsXmlContainerPage pageXml = CmsXmlContainerPageFactory.unmarshal(m_cms, m_cms.readFile(pageResource)); 791 CmsContainerPageBean page = pageXml.getContainerPage(m_cms); 792 for (CmsContainerBean container : page.getContainers().values()) { 793 if (m_state.m_excludedContainers.contains(container.getName())) { 794 continue; 795 } 796 for (CmsContainerElementBean element : container.getElements()) { 797 try { 798 element.initResource(m_cms); 799 checkFunction(pageResource, element); 800 Map<String, String> settings = element.getIndividualSettings(); 801 String formatterRef = settings.get(CmsFormatterConfig.FORMATTER_SETTINGS_KEY + container.getName()); 802 if (formatterRef == null) { 803 for (String key : settings.keySet()) { 804 if (key.startsWith(CmsFormatterConfig.FORMATTER_SETTINGS_KEY)) { 805 formatterRef = settings.get(key); 806 } 807 } 808 } 809 I_CmsFormatterBean formatter = config.findFormatter(formatterRef, /*nowarn=*/true); 810 CmsUUID formatterId = UNKNOWN_FORMATTER; 811 if (formatter != null) { 812 formatterId = new CmsUUID(formatter.getId()); 813 FormatterBean bean = new FormatterBean( 814 formatterId, 815 formatter.getLocation(), 816 formatter.getKey(), 817 formatter.getNiceName(m_locale)); 818 m_state.m_formatters.putIfAbsent(formatterId, bean); 819 } else { 820 m_state.m_formatters.putIfAbsent( 821 formatterId, 822 new FormatterBean(UNKNOWN_FORMATTER, "unknown", null, "Unknown formatter")); 823 } 824 addEntry(element.getTypeName(), pageResource.getStructureId(), formatterId); 825 } catch (CmsException e) { 826 LOG.error(e.getLocalizedMessage(), e); 827 } 828 } 829 } 830 } 831}