001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.ade.containerpage;
029
030import org.opencms.ade.configuration.CmsADEConfigData;
031import org.opencms.ade.containerpage.shared.CmsFormatterConfig;
032import org.opencms.file.CmsObject;
033import org.opencms.file.CmsProperty;
034import org.opencms.file.CmsResource;
035import org.opencms.file.CmsResourceFilter;
036import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
037import org.opencms.file.types.I_CmsResourceType;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsLog;
040import org.opencms.main.OpenCms;
041import org.opencms.util.CmsUUID;
042import org.opencms.xml.containerpage.CmsContainerBean;
043import org.opencms.xml.containerpage.CmsContainerElementBean;
044import org.opencms.xml.containerpage.CmsContainerPageBean;
045import org.opencms.xml.containerpage.CmsGroupContainerBean;
046import org.opencms.xml.containerpage.CmsXmlContainerPage;
047import org.opencms.xml.containerpage.CmsXmlContainerPageFactory;
048import org.opencms.xml.containerpage.CmsXmlGroupContainer;
049import org.opencms.xml.containerpage.CmsXmlGroupContainerFactory;
050import org.opencms.xml.containerpage.I_CmsFormatterBean;
051
052import java.io.ByteArrayInputStream;
053import java.io.ByteArrayOutputStream;
054import java.io.IOException;
055import java.io.ObjectInputStream;
056import java.io.ObjectOutputStream;
057import java.io.Serializable;
058import java.util.Collection;
059import java.util.Collections;
060import java.util.HashMap;
061import java.util.HashSet;
062import java.util.LinkedHashMap;
063import java.util.List;
064import java.util.Locale;
065import java.util.Map;
066import java.util.Set;
067import java.util.regex.Pattern;
068import java.util.stream.Collectors;
069import java.util.zip.DeflaterOutputStream;
070import java.util.zip.InflaterInputStream;
071
072import org.apache.commons.logging.Log;
073
074import com.google.common.collect.ArrayListMultimap;
075import com.google.common.collect.Multimap;
076
077/**
078 * Analyzes content type and formatter usage in a site / folder.
079 */
080public class CmsTypeAnalyzer {
081
082    /**
083     * Bean for formatter information.
084     */
085    public static class FormatterBean implements Serializable {
086
087        /** Serial version id. */
088        private static final long serialVersionUID = 1L;
089
090        /** The id. */
091        private CmsUUID m_id;
092
093        /** The key. */
094        private String m_key;
095
096        /** The path. */
097        private String m_path;
098
099        /** The nice name. */
100        private String m_niceName;
101
102        /**
103         * Instantiates a new formatter bean.
104         *
105         * @param id the id
106         * @param path the path
107         * @param key the key
108         * @param niceName the nice name
109         */
110        public FormatterBean(CmsUUID id, String path, String key, String niceName) {
111
112            super();
113            m_id = id;
114            m_path = path;
115            m_key = key;
116            m_niceName = niceName;
117        }
118
119        /**
120         * Gets the id.
121         *
122         * @return the id
123         */
124        public CmsUUID getId() {
125
126            return m_id;
127        }
128
129        /**
130         * Gets the key.
131         *
132         * @return the key
133         */
134        public String getKey() {
135
136            return m_key;
137        }
138
139        /**
140         * Gets the nice name.
141         *
142         * @return the nice name
143         */
144        public String getNiceName() {
145
146            return m_niceName;
147        }
148
149        /**
150         * Gets the path.
151         *
152         * @return the path
153         */
154        public String getPath() {
155
156            return m_path;
157        }
158
159    }
160
161    /**
162     * Data for a single resource.
163     */
164    public static class ResourceBean implements Serializable {
165
166        /** Serial version id. */
167        private static final long serialVersionUID = 1L;
168
169        /** The id. */
170        private CmsUUID m_id;
171
172        /** The path. */
173        private String m_path;
174
175        /**
176         * Instantiates a new resource bean.
177         *
178         * @param id the id
179         * @param path the path
180         */
181        public ResourceBean(CmsUUID id, String path) {
182
183            super();
184            m_id = id;
185            m_path = path;
186        }
187
188        /**
189         * Gets the id.
190         *
191         * @return the id
192         */
193        public CmsUUID getId() {
194
195            return m_id;
196        }
197
198        /**
199         * Gets the path.
200         *
201         * @return the path
202         */
203        public String getPath() {
204
205            return m_path;
206        }
207    }
208
209    /**
210     * Represents all data collected by the CmsTypeAnalyzer class.
211     */
212    public static class State implements Serializable {
213
214        /** Serial version id. */
215        private static final long serialVersionUID = 1L;
216
217        /** The type usage. */
218        protected Map<String, Multimap<CmsUUID, CmsUUID>> m_typeUsage = new HashMap<>();
219
220        /** The formatters. */
221        protected Map<CmsUUID, FormatterBean> m_formatters = new HashMap<>();
222
223        /** The pages. */
224        protected Map<CmsUUID, ResourceBean> m_pages = new HashMap<>();
225
226        /** The types. */
227        protected Map<String, TypeBean> m_types = new LinkedHashMap<>();
228
229        /** The path. */
230        protected String m_path;
231
232        /** The site root. */
233        protected String m_siteRoot;
234
235        /** The set of containers to exclude. */
236        protected Set<String> m_excludedContainers;
237
238        /** True if detail only contents are skipped. */
239        public boolean m_skipDetailOnly;
240
241        /** The map of (legacy) function usages. */
242        private Map<String, Set<String>> m_functionUsage = new HashMap<>();
243
244        /** The template regex. */
245        public String m_templateRegex;
246
247        public static long getSerialversionuid() {
248
249            return serialVersionUID;
250        }
251
252        /**
253         * Gets the container names to exclude.
254         *
255         * @return the top-level containers to exclude
256         */
257        public Set<String> getExcludedContainers() {
258
259            return m_excludedContainers;
260        }
261
262        /**
263         * Gets the formatters.
264         *
265         * @return the formatters
266         */
267        public Map<CmsUUID, FormatterBean> getFormatters() {
268
269            return m_formatters;
270        }
271
272        /**
273         * Gets the (legacy) dynamic function usages.
274         *
275         * @return the legacy function usages
276         */
277        public Map<String, Set<String>> getFunctionUsages() {
278
279            return m_functionUsage;
280        }
281
282        /**
283         * Gets the pages.
284         *
285         * @return the pages
286         */
287        public Map<CmsUUID, ResourceBean> getPages() {
288
289            return m_pages;
290        }
291
292        /**
293         * Gets the pages.
294         *
295         * @param type the type
296         * @param formatter the formatter
297         * @return the pages
298         */
299        public List<String> getPages(String type, CmsUUID formatter) {
300
301            Collection<CmsUUID> usage = getTypeUsage().get(type).get(formatter);
302            return usage.stream().map(id -> m_pages.get(id).getPath()).distinct().sorted().collect(Collectors.toList());
303        }
304
305        /**
306         * Gets the path.
307         *
308         * @return the path
309         */
310        public String getPath() {
311
312            return m_path;
313        }
314
315        /**
316         * Gets the site root.
317         *
318         * @return the site root
319         */
320        public String getSiteRoot() {
321
322            return m_siteRoot;
323        }
324
325        /**
326         * Gets the sorted formatters.
327         *
328         * @param type the type
329         * @return the sorted formatters
330         */
331        public List<FormatterBean> getSortedFormatters(String type) {
332
333            if (!m_typeUsage.containsKey(type)) {
334                return Collections.emptyList();
335            }
336            Multimap<CmsUUID, CmsUUID> formatterUsages = m_typeUsage.get(type);
337            return formatterUsages.keySet().stream().sorted((f1, f2) -> {
338                return -Integer.compare(formatterUsages.get(f1).size(), formatterUsages.get(f2).size());
339            }).map(id -> m_formatters.get(id)).collect(Collectors.toList());
340        }
341
342        public String getTemplateRegex() {
343
344            return m_templateRegex;
345        }
346
347        /**
348         * Gets the types.
349         *
350         * @return the types
351         */
352        public Map<String, TypeBean> getTypes() {
353
354            return m_types;
355        }
356
357        /**
358         * Gets the type usage.
359         *
360         * @return the type usage
361         */
362        public Map<String, Multimap<CmsUUID, CmsUUID>> getTypeUsage() {
363
364            return m_typeUsage;
365        }
366
367        public boolean isSkipDetailOnly() {
368
369            return m_skipDetailOnly;
370        }
371    }
372
373    /**
374     * Data for a single content type.
375     */
376    public static class TypeBean implements Serializable {
377
378        /** Serial version id. */
379        private static final long serialVersionUID = 1L;
380
381        /** The name. */
382        private String m_name;
383
384        /** The nice name. */
385        private String m_niceName;
386
387        /** The count. */
388        private int m_count;
389
390        /** The usage count. */
391        private int m_usageCount;
392
393        /**
394         * Instantiates a new type bean.
395         *
396         * @param name the name
397         * @param niceName the nice name
398         * @param count the count
399         */
400        public TypeBean(String name, String niceName, int count) {
401
402            super();
403            m_name = name;
404            m_niceName = niceName;
405            m_count = count;
406        }
407
408        /**
409         * Gets the count.
410         *
411         * @return the count
412         */
413        public int getCount() {
414
415            return m_count;
416        }
417
418        /**
419         * Gets the name.
420         *
421         * @return the name
422         */
423        public String getName() {
424
425            return m_name;
426        }
427
428        /**
429         * Gets the nice name.
430         *
431         * @return the nice name
432         */
433        public String getNiceName() {
434
435            return m_niceName;
436        }
437
438        /**
439         * Gets the usage count.
440         *
441         * @return the usage count
442         */
443        public int getUsageCount() {
444
445            return m_usageCount;
446        }
447
448        /**
449         * Sets the count.
450         *
451         * @param count the new count
452         */
453        public void setCount(int count) {
454
455            m_count = count;
456        }
457
458        /**
459         * Sets the usage count.
460         *
461         * @param referenceCount the new usage count
462         */
463        public void setUsageCount(int referenceCount) {
464
465            m_usageCount = referenceCount;
466        }
467    }
468
469    /** The Constant LOG. */
470    private static final Log LOG = CmsLog.getLog(CmsTypeAnalyzer.class);
471
472    /** The Constant UNKNOWN_FORMATTER. */
473    public static final CmsUUID UNKNOWN_FORMATTER = CmsUUID.getNullUUID();
474
475    /** The m state. */
476    private State m_state = new State();
477
478    /** The m locale. */
479    private Locale m_locale;
480
481    /** The m cms. */
482    private CmsObject m_cms;
483
484    private Pattern m_templatePattern;
485
486    /**
487     * Creates a new instance.
488     *
489     * @param cms the CMS context
490     * @param siteRoot the site root
491     * @param path the site path to analyze
492     * @throws CmsException if something goes wrong
493     */
494    public CmsTypeAnalyzer(
495        CmsObject cms,
496        String siteRoot,
497        String path,
498        boolean skipDetailOnly,
499        Set<String> excludedContainers,
500        String templateRegex)
501    throws CmsException {
502
503        m_cms = OpenCms.initCmsObject(cms);
504        m_cms.getRequestContext().setSiteRoot(siteRoot);
505        m_state.m_path = path;
506        m_state.m_siteRoot = siteRoot;
507        m_state.m_skipDetailOnly = skipDetailOnly;
508        m_state.m_excludedContainers = excludedContainers;
509        m_state.m_templateRegex = templateRegex;
510        m_templatePattern = Pattern.compile(templateRegex);
511        m_locale = OpenCms.getWorkplaceManager().getWorkplaceLocale(cms);
512    }
513
514    /**
515     * Deserializes the state from a byte array.
516     *
517     * @param data the data
518     * @return the deserialized state
519     * @throws Exception if something goes wrong
520     */
521    public static State readState(byte[] data) throws Exception {
522
523        ByteArrayInputStream bais = new ByteArrayInputStream(data);
524        try (ObjectInputStream stream = new ObjectInputStream(new InflaterInputStream(bais))) {
525            return (State)stream.readObject();
526        }
527
528    }
529
530    /**
531     * Runs the type analysis and returns the state object with all the collected data.
532     *
533     * @param cms the CMS context
534     * @param path the path
535     * @param skipDetailOnly true if detail only pages should be skipped
536     * @param excludeContainersStr a comma-separated list of container names to exclude from analysis (only direct elements)
537     * @param templateRegex a regular expression such that only pages whose template matches that regex should be processed
538     * @return the state
539     * @throws CmsException if something goes wrong
540     */
541    public static State run(
542        CmsObject cms,
543        String path,
544        boolean skipDetailOnly,
545        String excludeContainersStr,
546        String templateRegex)
547    throws CmsException {
548
549        Set<String> excludedContainers = new HashSet<>();
550        for (String token : excludeContainersStr.split(",")) {
551            token = token.trim();
552            if ("".equals(token)) {
553                continue;
554            }
555            excludedContainers.add(token);
556        }
557
558        return (new CmsTypeAnalyzer(
559            cms,
560            cms.getRequestContext().getSiteRoot(),
561            path,
562            skipDetailOnly,
563            excludedContainers,
564            templateRegex)).processFolder();
565
566    }
567
568    /**
569     * Serializes a state to a byte array.
570     *
571     * @param state the state
572     * @return the serialized data
573     * @throws IOException if something goes wrong with serialization
574     */
575    public static byte[] writeState(State state) throws IOException {
576
577        ByteArrayOutputStream baos = new ByteArrayOutputStream();
578
579        try (ObjectOutputStream out = new ObjectOutputStream(new DeflaterOutputStream(baos))) {
580            out.writeObject(state);
581        }
582        return baos.toByteArray();
583
584    }
585
586    /**
587     * Gets the state.
588     *
589     * @return the state
590     */
591    public State getState() {
592
593        return m_state;
594    }
595
596    /**
597     * Process folder.
598     *
599     * @return the state
600     * @throws CmsException the cms exception
601     */
602    public State processFolder() throws CmsException {
603
604        long start = System.currentTimeMillis();
605        I_CmsResourceType pageType = OpenCms.getResourceManager().getResourceType(
606            CmsResourceTypeXmlContainerPage.RESOURCE_TYPE_NAME);
607        I_CmsResourceType modelGroupType = OpenCms.getResourceManager().getResourceType(
608            CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME);
609        List<CmsResource> pages = m_cms.readResources(
610            m_state.m_path,
611            CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(pageType));
612        for (CmsResource page : pages) {
613            if (!checkTemplate(page)) {
614                continue;
615            }
616            if (m_state.m_skipDetailOnly && page.getRootPath().contains(".detailContainers")) {
617                continue;
618            }
619            processPage(page);
620        }
621        List<CmsResource> modelGroups = m_cms.readResources(
622            m_state.m_path,
623            CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(modelGroupType));
624        for (CmsResource modelGroup : modelGroups) {
625            if (!checkTemplate(modelGroup)) {
626                continue;
627            }
628            processPage(modelGroup);
629        }
630
631        List<CmsResource> elementGroups = m_cms.readResources(
632            m_state.m_path,
633            CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(
634                OpenCms.getResourceManager().getResourceType(
635                    CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)));
636        for (CmsResource elementGroup : elementGroups) {
637            if (!checkTemplate(elementGroup)) {
638                continue;
639            }
640            processElementGroup(elementGroup);
641        }
642        for (String type : OpenCms.getADEManager().getContentTypeNames(false)) {
643            addTypeInfo(type);
644        }
645
646        long end = System.currentTimeMillis();
647
648        for (String type : m_state.m_types.keySet()) {
649            Multimap<CmsUUID, CmsUUID> usage = m_state.m_typeUsage.get(type);
650            if (usage != null) {
651                m_state.m_types.get(type).setUsageCount(usage.values().size());
652            }
653        }
654        LinkedHashMap<String, TypeBean> typesSorted = new LinkedHashMap<>();
655        m_state.m_types.values().stream().sorted(
656            (a, b) -> Integer.compare(b.getUsageCount(), a.getUsageCount())).forEach(
657                type -> typesSorted.put(type.getName(), type));
658        m_state.m_types = typesSorted;
659        LOG.info("Processed " + pages.size() + " pages, took " + (end - start) + "ms");
660        return m_state;
661
662    }
663
664    /**
665     * Adds the entry for a specific content type usage in a page.
666     *
667     * @param typeName the type name
668     * @param pageId the page id
669     * @param formatterId the formatter id
670     */
671    private void addEntry(String typeName, CmsUUID pageId, CmsUUID formatterId) {
672
673        m_state.m_typeUsage.computeIfAbsent(typeName, k -> ArrayListMultimap.create()).put(formatterId, pageId);
674
675    }
676
677    /**
678     * Adds the resource.
679     *
680     * @param pageResource the page resource
681     */
682    private void addResource(CmsResource pageResource) {
683
684        m_state.m_pages.put(
685            pageResource.getStructureId(),
686            new ResourceBean(pageResource.getStructureId(), m_cms.getSitePath(pageResource)));
687    }
688
689    /**
690     * Adds the type info.
691     *
692     * @param type the type
693     */
694    private void addTypeInfo(String type) {
695
696        if (m_state.m_types.get(type) != null) {
697            return;
698        }
699        int count = -1;
700        try {
701            List<CmsResource> resources = m_cms.readResources(
702                m_state.m_path,
703                CmsResourceFilter.IGNORE_EXPIRATION.addRequireType(OpenCms.getResourceManager().getResourceType(type)),
704                true);
705            count = resources.size();
706            String key = OpenCms.getWorkplaceManager().getExplorerTypeSetting(type).getKey();
707            String label = OpenCms.getWorkplaceManager().getMessages(m_locale).key(key);
708            m_state.m_types.put(type, new TypeBean(type, label, count));
709        } catch (Exception e) {
710            LOG.error(e.getLocalizedMessage(), e);
711        }
712
713    }
714
715    /**
716     * If the container element is a (legacy) dynamic function, add it to map of function usages.
717     *
718     * @param pageResource the current page
719     * @param element the container element
720     */
721    private void checkFunction(CmsResource pageResource, CmsContainerElementBean element) {
722
723        if (OpenCms.getResourceManager().matchResourceType("function", element.getResource().getTypeId())) {
724            m_state.m_functionUsage.computeIfAbsent(element.getResource().getRootPath(), p -> new HashSet<>()).add(
725                m_cms.getSitePath(pageResource));
726
727        }
728    }
729
730    /**
731     * Checks that the template property of the page matches the template regex.<p>
732     *
733     * @param page the page to check
734     * @return true if the template matches the template regex
735     */
736    private boolean checkTemplate(CmsResource page) {
737
738        try {
739            CmsProperty templateProp = m_cms.readPropertyObject(page, "template", true);
740            String templateValue = templateProp.getValue();
741            if (templateValue == null) {
742                templateValue = "";
743            }
744            return m_templatePattern.matcher(templateValue).matches();
745        } catch (Exception e) {
746            LOG.error(e.getLocalizedMessage(), e);
747            return false;
748        }
749    }
750
751    /**
752     * Process element group.
753     *
754     * @param groupResource the group resource
755     * @throws CmsException if something goes wrong
756     */
757    private void processElementGroup(CmsResource groupResource) throws CmsException {
758
759        addResource(groupResource);
760
761        CmsXmlGroupContainer groupXml = CmsXmlGroupContainerFactory.unmarshal(m_cms, m_cms.readFile(groupResource));
762        CmsGroupContainerBean group = groupXml.getGroupContainer(m_cms);
763
764        for (CmsContainerElementBean element : group.getElements()) {
765            try {
766                element.initResource(m_cms);
767                checkFunction(groupResource, element);
768                addEntry(element.getTypeName(), groupResource.getStructureId(), UNKNOWN_FORMATTER);
769            } catch (Exception e) {
770                LOG.error(e.getLocalizedMessage(), e);
771            }
772        }
773
774    }
775
776    /**
777     * Process page.
778     *
779     * @param pageResource the page resource
780     * @throws CmsException the cms exception
781     */
782    private void processPage(CmsResource pageResource) throws CmsException {
783
784        LOG.debug("processing page " + pageResource.getRootPath());
785
786        CmsADEConfigData config = OpenCms.getADEManager().lookupConfigurationWithCache(
787            m_cms,
788            pageResource.getRootPath());
789        addResource(pageResource);
790        CmsXmlContainerPage pageXml = CmsXmlContainerPageFactory.unmarshal(m_cms, m_cms.readFile(pageResource));
791        CmsContainerPageBean page = pageXml.getContainerPage(m_cms);
792        for (CmsContainerBean container : page.getContainers().values()) {
793            if (m_state.m_excludedContainers.contains(container.getName())) {
794                continue;
795            }
796            for (CmsContainerElementBean element : container.getElements()) {
797                try {
798                    element.initResource(m_cms);
799                    checkFunction(pageResource, element);
800                    Map<String, String> settings = element.getIndividualSettings();
801                    String formatterRef = settings.get(CmsFormatterConfig.FORMATTER_SETTINGS_KEY + container.getName());
802                    if (formatterRef == null) {
803                        for (String key : settings.keySet()) {
804                            if (key.startsWith(CmsFormatterConfig.FORMATTER_SETTINGS_KEY)) {
805                                formatterRef = settings.get(key);
806                            }
807                        }
808                    }
809                    I_CmsFormatterBean formatter = config.findFormatter(formatterRef, /*nowarn=*/true);
810                    CmsUUID formatterId = UNKNOWN_FORMATTER;
811                    if (formatter != null) {
812                        formatterId = new CmsUUID(formatter.getId());
813                        FormatterBean bean = new FormatterBean(
814                            formatterId,
815                            formatter.getLocation(),
816                            formatter.getKey(),
817                            formatter.getNiceName(m_locale));
818                        m_state.m_formatters.putIfAbsent(formatterId, bean);
819                    } else {
820                        m_state.m_formatters.putIfAbsent(
821                            formatterId,
822                            new FormatterBean(UNKNOWN_FORMATTER, "unknown", null, "Unknown formatter"));
823                    }
824                    addEntry(element.getTypeName(), pageResource.getStructureId(), formatterId);
825                } catch (CmsException e) {
826                    LOG.error(e.getLocalizedMessage(), e);
827                }
828            }
829        }
830    }
831}