/*
 * Decompiled with CFR 0.152.
 */
package com.github.vector4wang.thread;

import com.github.vector4wang.VWCrawler;
import com.github.vector4wang.annotation.CssSelector;
import com.github.vector4wang.model.PageRequest;
import com.github.vector4wang.util.CrawlerUtil;
import com.github.vector4wang.util.GenericsUtils;
import com.github.vector4wang.util.ReflectUtils;
import com.github.vector4wang.util.SelectType;
import java.io.IOException;
import java.lang.reflect.Field;
import java.net.ConnectException;
import java.net.SocketTimeoutException;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CrawlerThread
implements Runnable {
    private Logger logger = LoggerFactory.getLogger((String)CrawlerThread.class.getName());
    private VWCrawler vwCrawler;
    private boolean isRunning;

    public CrawlerThread(VWCrawler vwCrawler) {
        this.vwCrawler = vwCrawler;
    }

    public boolean isRunning() {
        return this.isRunning;
    }

    public void setRunning(boolean running) {
        this.isRunning = running;
    }

    @Override
    public void run() {
        try {
            while (true) {
                this.isRunning = false;
                this.vwCrawler.tryStop();
                String url = this.vwCrawler.generateUrl();
                this.isRunning = true;
                if (StringUtils.isEmpty((CharSequence)url)) {
                    this.logger.info("no url");
                    break;
                }
                this.process(url);
            }
        }
        catch (Exception e) {
            if (e instanceof InterruptedException) {
                this.logger.info("vw-crawler[" + Thread.currentThread().getName() + "] stopped!", (Object)e.getMessage());
            }
            this.logger.error(e.getMessage(), (Throwable)e);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void process(String url) {
        if (this.vwCrawler.getCrawlerService().isExist(url)) {
            return;
        }
        this.logger.info("{}\u5f00\u59cb\u6293\u53d6[{}]\u5f53\u524d\u5f85\u6293\u53d6\u6570\u4e3a{},\u5df2\u6293\u53d6\u6570\u4e3a{}", new Object[]{Thread.currentThread().getName(), url, this.vwCrawler.getWaitCrawlerUrls().size(), this.vwCrawler.getCrawledUrls().size()});
        try {
            Document document = null;
            int timeoutCount = 0;
            do {
                Object proxy2s;
                PageRequest pageRequest = new PageRequest();
                pageRequest.setUrl(url);
                pageRequest.setTimeout(this.vwCrawler.getTimeout());
                if (this.vwCrawler.getHeaderMap() != null && !this.vwCrawler.getHeaderMap().isEmpty()) {
                    pageRequest.setHeader(this.vwCrawler.getHeaderMap());
                }
                if ((proxy2s = this.vwCrawler.getProxyExtractor().getProxy2s()) != null && proxy2s.size() > 0) {
                    pageRequest.setProxy(this.vwCrawler.getProxyExtractor().extractProxyIp());
                }
                try {
                    document = this.vwCrawler.getDownloader().downloadPage(pageRequest);
                }
                catch (ConnectException socketTimeoutException) {
                    this.logger.warn("\u94fe\u63a5\u8d85\u65f6");
                }
                catch (SocketTimeoutException socketTimeoutException) {
                    if (++timeoutCount < this.vwCrawler.getRetryCount()) continue;
                    break;
                }
                catch (Exception e) {
                    e.printStackTrace();
                }
                finally {
                    if (++timeoutCount >= this.vwCrawler.getRetryCount()) break;
                }
            } while (document == null);
            if (!this.vwCrawler.getCrawlerService().isContinue(document)) {
                return;
            }
            if (document != null) {
                Elements links = document.select("a[href]");
                if (links.size() > 0) {
                    String href;
                    for (Element link : links) {
                        href = link.absUrl("href");
                        for (String seedsPageUrlRex : this.vwCrawler.getSeedsPageUrlRex()) {
                            if (!CrawlerUtil.isMatch(seedsPageUrlRex, href)) continue;
                            this.vwCrawler.addWaitCrawlerUrl(href);
                        }
                    }
                    for (Element link : links) {
                        href = link.absUrl("href");
                        for (String targetUrlRex : this.vwCrawler.getTargetUrlRex()) {
                            if (!CrawlerUtil.isMatch(targetUrlRex, href)) continue;
                            this.vwCrawler.addWaitCrawlerUrl(href);
                        }
                    }
                }
                if (!this.vwCrawler.isTargetUrl(url)) {
                    return;
                }
                Class aClass = GenericsUtils.getSuperClassGenericType(this.vwCrawler.getCrawlerService().getClass());
                Object pageVo = aClass.newInstance();
                Field[] declaredFields = pageVo.getClass().getDeclaredFields();
                if (declaredFields != null) {
                    for (Field declaredField : declaredFields) {
                        CssSelector annotation = declaredField.getAnnotation(CssSelector.class);
                        if (annotation == null) continue;
                        String selector = annotation.selector();
                        SelectType selectType = annotation.resultType();
                        if (selector == null || selector.length() <= 0) continue;
                        String result = selectType == SelectType.HTML ? document.select(selector).toString() : document.select(selector).text();
                        declaredField.setAccessible(true);
                        Object transferVal = ReflectUtils.parseValueWithType(result, declaredField);
                        declaredField.set(pageVo, transferVal);
                    }
                }
                this.vwCrawler.getCrawlerService().parsePage(document, pageVo);
                this.vwCrawler.getCrawlerService().save(pageVo);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            if (e instanceof IOException) {
                this.logger.warn("\u8bf7\u6c42\u5730\u5740\u53d1\u751f\u9519\u8bef");
            }
            this.logger.error(e.getMessage());
        }
    }
}

