package com.els.tso.contract.util;

import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author daipj
 */
public class WordToHtml {

    private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);
    private static WordToHtml instance = new WordToHtml();

    private WordToHtml() {
    }

    public static WordToHtml getInstance() {
        return instance;
    }

    public String word2007ToHtml(InputStream inStream) {
        XWPFDocument document = null;
        ByteArrayOutputStream outStream = null;
        try {
            // 加载文件
            document = new XWPFDocument(inStream);

            // 将word中的图片加载到特定目录
            File imgPath = new File("");
            XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imgPath))
                .setIgnoreStylesIfUnused(true);
            options.setExtractor(new FileImageExtractor(imgPath));

            // XWPFDocument to XHTML
            outStream = new ByteArrayOutputStream();
            XHTMLConverter.getInstance().convert(document, outStream, options);
            String content = new String(outStream.toByteArray(), StandardCharsets.UTF_8);
            return doFilter(content);
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                if (inStream != null) {
                    inStream.close();
                }
                if (outStream != null) {
                    outStream.close();
                }
                if (document != null) {
                    document.close();
                }
            } catch (Exception ignored) {
            }
        }
    }

    public String word2007ToHtml(File file) {

        try (InputStream inStream = new FileInputStream(file);) {

            return word2007ToHtml(inStream);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return null;
    }

    public String word2003ToHtml(File file) {

        try (InputStream inStream = new FileInputStream(file);) {

            return word2003ToHtml(inStream);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return null;
    }

    public String word2003ToHtml(InputStream istream) {

        HWPFDocument document = null;
        ByteArrayOutputStream outputStream = null;
        try {

            document = new HWPFDocument(istream);
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
            wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                @Override
                public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
                                          float widthInches, float heightInches) {
                    return suggestedName;
                }
            });
            wordToHtmlConverter.processDocument(document);

            List<Picture> pictures = document.getPicturesTable().getAllPictures();
            if (pictures.size() > 0) {
                String imgPath = "";
                File imgPathFile = new File(imgPath);
                if (!imgPathFile.exists()) {
                    imgPathFile.mkdirs();
                }
                for (Picture pic : pictures) {
                    pic.writeImageContent(new FileOutputStream(imgPath + File.separator + pic.suggestFullFileName()));
                }
            }
            Document htmlDocument = wordToHtmlConverter.getDocument();
            outputStream = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(outputStream);

            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
            String content = new String(outputStream.toByteArray());

            return doFilter(content);
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            try {
                if (istream != null) {
                    istream.close();
                }
                if (outputStream != null) {
                    outputStream.close();
                }
                if (document != null) {
                    document.close();
                }
            } catch (Exception e) {
                logger.error("IOException close error!", e);
            }
        }
    }

    public String doFilter(String content) {
        String regex = "\\$\\{.*?\\}";
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(content);
        while (m.find()) {
            String dist = delhtmltag(m.group());
            if ("${}".equalsIgnoreCase(dist)) { // 如果 dist=${}
                dist = "${r'${}'}";
            }

            content = content.replace(m.group(), dist);
        }
        return content;
    }

    public String delhtmltag(String htmlStr) {
        String regExScript = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
        String regExStyle = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
        String regExHtml = "<[^>]+>"; // 定义HTML标签的正则表达式

        Pattern pScript = Pattern.compile(regExScript, Pattern.CASE_INSENSITIVE);
        Matcher mScript = pScript.matcher(htmlStr);
        htmlStr = mScript.replaceAll(""); // 过滤script标签

        Pattern pStyle = Pattern.compile(regExStyle, Pattern.CASE_INSENSITIVE);
        Matcher mStyle = pStyle.matcher(htmlStr);
        htmlStr = mStyle.replaceAll(""); // 过滤style标签

        Pattern pHtml = Pattern.compile(regExHtml, Pattern.CASE_INSENSITIVE);
        Matcher mHtml = pHtml.matcher(htmlStr);
        htmlStr = mHtml.replaceAll(""); // 过滤html标签

        return htmlStr.trim(); // 返回文本字符串
    }

}

