package com.day.cq.dam.core.impl.process;

import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.Rendition;
import com.day.cq.dam.commons.process.AbstractAssetWorkflowProcess;
import com.day.cq.dam.commons.util.AssetUpdate;
import com.day.cq.dam.commons.util.AssetUpdateMonitor;
import com.day.cq.workflow.WorkflowException;
import com.day.cq.workflow.WorkflowSession;
import com.day.cq.workflow.exec.WorkItem;
import com.day.cq.workflow.metadata.MetaDataMap;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Set;
import javax.jcr.Binary;
import javax.jcr.ItemExistsException;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.commons.osgi.PropertiesUtil;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

@Service
@Component(metatype = true, label = "Adobe CQ DAM Text Extraction", description = "Extracts text from Assets for mime types configured. Text will be available as '<assetpath>/jcr:content/text' resource and can be used for indexing. This eases load for the oak indexing engine.")
/* loaded from: input_file:com/day/cq/dam/core/impl/process/TextExtractionProcess.class */
public class TextExtractionProcess extends AbstractAssetWorkflowProcess {

    @Property(boolValue = {true}, label = "Activated", description = "Apply the text extraction as configured.")
    private static final String CONF_APPLY = "apply";

    @Property(value = {"application/pdf"}, label = "Mime types", description = "Mime types  whose text gets extracted into a separate resource.", cardinality = Integer.MAX_VALUE)
    private static final String CONF_MIME_TYPES = "mimeTypes";

    @Property(longValue = {100000}, label = "Max Extracted Length", description = "Extract text up to a maximum number of characters. Use negative value for unlimited.")
    private static final String CONF_MAX_EXTRACT = "maxExtract";

    @Reference
    private AssetUpdateMonitor monitor;
    private final Set<String> mimeTypes = new HashSet();
    private long maxExtractedLength = MAX_EXTRACT_DEF.longValue();
    private static final String[] MIME_TYPES_DEF = {"application/pdf"};
    private static final Long MAX_EXTRACT_DEF = 100000L;

    /* loaded from: input_file:com/day/cq/dam/core/impl/process/TextExtractionProcess$PlainTextHandler.class */
    private static final class PlainTextHandler implements ContentHandler {
        private final Writer out;

        PlainTextHandler(Writer writer) {
            this.out = writer;
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            try {
                this.out.write(cArr, i, i2);
            } catch (IOException e) {
                throw new SAXException("output characters", e);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
            try {
                this.out.write(cArr, i, i2);
            } catch (IOException e) {
                throw new SAXException("output characters", e);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }
    }

    public void execute(final WorkItem workItem, WorkflowSession workflowSession, MetaDataMap metaDataMap) throws WorkflowException {
        final Logger logger = LoggerFactory.getLogger(getClass());
        this.monitor.startUpdate(workItem, getResourceResolver(workflowSession.getSession()), this).checkAndRun(new AssetUpdate.AssetCheck() { // from class: com.day.cq.dam.core.impl.process.TextExtractionProcess.1
            public boolean isAcceptable(Asset asset) throws WorkflowException {
                if (TextExtractionProcess.this.mimeTypes.isEmpty()) {
                    logger.debug("inactive: mimeTypes={}", TextExtractionProcess.this.mimeTypes);
                    return false;
                }
                Rendition original = asset.getOriginal();
                if (original == null) {
                    logger.debug("no original rendition available for {}", asset.getPath());
                    return false;
                }
                MediaType parse = MediaType.parse(original.getMimeType());
                String str = null;
                if (parse != null) {
                    str = parse.getType() + "/" + parse.getSubtype();
                }
                return parse != null && (TextExtractionProcess.this.mimeTypes.contains(str) || TextExtractionProcess.this.mimeTypes.contains(parse.toString()));
            }

            public boolean isNullAcceptable() throws WorkflowException {
                throw new WorkflowException("execute: cannot process thumbnails, asset [{" + workItem.getWorkflowData().getPayload().toString() + "}] in payload doesn't exist for workflow [{" + workItem.getId() + "}].");
            }
        }, new AssetUpdate.Runner() { // from class: com.day.cq.dam.core.impl.process.TextExtractionProcess.2
            /* JADX WARN: Finally extract failed */
            public void run(Asset asset, AssetUpdate assetUpdate) throws WorkflowException, Exception {
                Parser parser = TikaConfig.getDefaultConfig().getParser();
                ParseContext parseContext = new ParseContext();
                Metadata metadata = new Metadata();
                try {
                    try {
                        try {
                            try {
                                try {
                                    Rendition original = asset.getOriginal();
                                    MediaType parse = MediaType.parse(original.getMimeType());
                                    String str = null;
                                    String str2 = null;
                                    if (parse != null) {
                                        str = parse.getType() + "/" + parse.getSubtype();
                                        str2 = (String) parse.getParameters().get("charset");
                                    }
                                    metadata.set("Content-Type", str);
                                    if (str2 != null) {
                                        metadata.set("Content-Encoding", str2);
                                    }
                                    File createTempFile = File.createTempFile("cq.dam.tx", ".txt");
                                    FileOutputStream fileOutputStream = new FileOutputStream(createTempFile);
                                    BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
                                    ContentHandler plainTextHandler = new PlainTextHandler(new OutputStreamWriter(bufferedOutputStream, "utf-8"));
                                    if (TextExtractionProcess.this.maxExtractedLength >= 0) {
                                        plainTextHandler = new WriteOutContentHandler(plainTextHandler, (int) TextExtractionProcess.this.maxExtractedLength);
                                    }
                                    logger.debug("extracting text node from {}", asset.getPath());
                                    InputStream inputStream = null;
                                    try {
                                        try {
                                            inputStream = original.getStream();
                                            parser.parse(inputStream, plainTextHandler, metadata, parseContext);
                                            IOUtils.closeQuietly(inputStream);
                                        } catch (SAXException e) {
                                            if (!(plainTextHandler instanceof WriteOutContentHandler) || !((WriteOutContentHandler) plainTextHandler).isWriteLimitReached(e)) {
                                                throw e;
                                            }
                                            IOUtils.closeQuietly(inputStream);
                                        }
                                        bufferedOutputStream.close();
                                        Node node = ((Node) asset.adaptTo(Node.class)).getNode("jcr:content");
                                        if (node == null) {
                                            logger.warn("jcr:content subnode no longer available for {}", asset.getPath());
                                            IOUtils.closeQuietly(fileOutputStream);
                                            IOUtils.closeQuietly((InputStream) null);
                                            FileUtils.deleteQuietly(createTempFile);
                                            return;
                                        }
                                        FileInputStream fileInputStream = new FileInputStream(createTempFile);
                                        TextExtractionProcess.this.putFile(node, "text", "text/plain", "utf-8", fileInputStream);
                                        logger.debug("created/updated text node for {}", asset.getPath());
                                        IOUtils.closeQuietly(fileOutputStream);
                                        IOUtils.closeQuietly(fileInputStream);
                                        FileUtils.deleteQuietly(createTempFile);
                                    } catch (Throwable th) {
                                        IOUtils.closeQuietly(inputStream);
                                        throw th;
                                    }
                                } catch (RepositoryException e2) {
                                    logger.error("Creating jcr:content/text node", e2);
                                    IOUtils.closeQuietly((OutputStream) null);
                                    IOUtils.closeQuietly((InputStream) null);
                                    FileUtils.deleteQuietly((File) null);
                                }
                            } catch (TikaException e3) {
                                logger.error("Apache Tika error on text extraction", e3);
                                IOUtils.closeQuietly((OutputStream) null);
                                IOUtils.closeQuietly((InputStream) null);
                                FileUtils.deleteQuietly((File) null);
                            }
                        } catch (SAXException e4) {
                            logger.error("SAX error on text extraction", e4);
                            IOUtils.closeQuietly((OutputStream) null);
                            IOUtils.closeQuietly((InputStream) null);
                            FileUtils.deleteQuietly((File) null);
                        }
                    } catch (Throwable th2) {
                        IOUtils.closeQuietly((OutputStream) null);
                        IOUtils.closeQuietly((InputStream) null);
                        FileUtils.deleteQuietly((File) null);
                        throw th2;
                    }
                } catch (IOException e5) {
                    throw new WorkflowException("IO error on text extraction", e5);
                }
            }
        });
    }

    @Activate
    private void activate(ComponentContext componentContext) throws RepositoryException {
        boolean z = PropertiesUtil.toBoolean(componentContext.getProperties().get(CONF_APPLY), true);
        this.mimeTypes.clear();
        if (z) {
            String[] stringArray = PropertiesUtil.toStringArray(componentContext.getProperties().get(CONF_MIME_TYPES), new String[0]);
            if (stringArray == null) {
                stringArray = MIME_TYPES_DEF;
            }
            this.mimeTypes.addAll(Arrays.asList(stringArray));
        }
        this.maxExtractedLength = PropertiesUtil.toLong(componentContext.getProperties().get(CONF_MAX_EXTRACT), MAX_EXTRACT_DEF.longValue());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Node putFile(Node node, String str, String str2, String str3, InputStream inputStream) throws RepositoryException {
        String str4;
        Session session = node.getSession();
        Binary createBinary = session.getValueFactory().createBinary(inputStream);
        try {
            str4 = "oak:Resource";
            str4 = session.getWorkspace().getNodeTypeManager().hasNodeType(str4) ? "oak:Resource" : "nt:resource";
            Node orAddNode = getOrAddNode(node, str, "nt:file");
            Node orAddNode2 = getOrAddNode(orAddNode, "jcr:content", str4);
            orAddNode2.setProperty("jcr:mimeType", str2);
            orAddNode2.setProperty("jcr:encoding", str3);
            orAddNode2.setProperty("jcr:lastModified", Calendar.getInstance());
            orAddNode2.setProperty("jcr:data", createBinary);
            createBinary.dispose();
            return orAddNode;
        } catch (Throwable th) {
            createBinary.dispose();
            throw th;
        }
    }

    private Node getOrAddNode(Node node, String str, String str2) throws RepositoryException {
        Node node2;
        try {
            node2 = node.addNode(str, str2);
        } catch (ItemExistsException e) {
            node.getSession().refresh(true);
            node2 = node.getNode(str);
            if (node2 == null) {
                throw e;
            }
        }
        return node2;
    }

    protected void bindMonitor(AssetUpdateMonitor assetUpdateMonitor) {
        this.monitor = assetUpdateMonitor;
    }

    protected void unbindMonitor(AssetUpdateMonitor assetUpdateMonitor) {
        if (this.monitor == assetUpdateMonitor) {
            this.monitor = null;
        }
    }
}
