多测师是一家拥有先进的教学理念,强大的师资团队,业内好评甚多的接口自动化测试培训机构!

17727591462

联系电话

您现在所在位置:接口自动化测试培训 > 新闻资讯

爬虫框架WebMagic源码分析之Selenium-自动化测试

更新时间:2022-03-25 09:41:15 作者:多测师 浏览:347

  webmagic有一个selenium模块,其中实现了一个SeleniumDownloader。但是感觉灵活性不大。所以我就自己参考实现了一个。

爬虫框架WebMagic源码分析之Selenium-自动化测试

  首先是WebDriverPool用来管理WebDriver池:

  import java.util.ArrayList;

  import java.util.concurrent.BlockingDeque;

  import java.util.concurrent.LinkedBlockingDeque;

  import java.util.concurrent.TimeUnit;

  import java.util.concurrent.atomic.AtomicInteger;

  import org.openqa.selenium.WebDriver;

  import org.openqa.selenium.phantomjs.PhantomJSDriver;

  import org.openqa.selenium.phantomjs.PhantomJSDriverService;

  import org.openqa.selenium.remote.DesiredCapabilities;

  import org.slf4j.Logger;

  import org.slf4j.LoggerFactory;

  import net.xby1993.common.util.FileUtil;

  /**

  * @author taojw

  */

  public class WebDriverPool {

  private Logger logger = LoggerFactory.getLogger(getClass());

  private int CAPACITY = 5;

  private AtomicInteger refCount = new AtomicInteger(0);

  private static final String DRIVER_PHANTOMJS = "phantomjs";

  /**

  * store webDrivers available

  */

  private BlockingDeque innerQueue = new LinkedBlockingDeque(

  CAPACITY);

  private static String PHANTOMJS_PATH;

  private static DesiredCapabilities caps = DesiredCapabilities.phantomjs();

  static {

  PHANTOMJS_PATH = FileUtil.getCommonProp("phantomjs.path");

  caps.setJavascriptEnabled(true);

  caps.setCapability(

  PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,

  PHANTOMJS_PATH);

  caps.setCapability("takesScreenshot", false);

  caps.setCapability(

  PhantomJSDriverService.PHANTOMJS_PAGE_CUSTOMHEADERS_PREFIX

  + "User-Agent",

  "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36");

  ArrayList cliArgsCap = new ArrayList();

  //http://phantomjs.org/api/command-line.html

  cliArgsCap.add("--web-security=false");

  cliArgsCap.add("--ssl-protocol=any");

  cliArgsCap.add("--ignore-ssl-errors=true");

  cliArgsCap.add("--load-images=false"); //不加载图片

  caps.setCapability(PhantomJSDriverService.PHANTOMJS_CLI_ARGS,

  cliArgsCap);

  caps.setCapability(

  PhantomJSDriverService.PHANTOMJS_GHOSTDRIVER_CLI_ARGS,

  new String[] {"--logLevel=INFO"});

  }

  public WebDriverPool() {

  }

  public WebDriverPool(int poolsize) {

  this.CAPACITY = poolsize;

  innerQueue = new LinkedBlockingDeque(poolsize);

  }

  public WebDriver get() throws InterruptedException {

  WebDriver poll = innerQueue.poll();

  if (poll != null) {

  return poll;

  }

  if (refCount.get() < CAPACITY) {

  synchronized (innerQueue) {

  if (refCount.get() < CAPACITY) {

  WebDriver mDriver = new PhantomJSDriver(caps);

  // 尝试性解决:https://github.com/ariya/phantomjs/issues/11526问题

  mDriver.manage().timeouts()

  .pageLoadTimeout(60, TimeUnit.SECONDS);

  // mDriver.manage().window().setSize(new Dimension(1366,

  // 768));

  innerQueue.add(mDriver);

  refCount.incrementAndGet();

  }

  }

  }

  return innerQueue.take();

  }

  public void returnToPool(WebDriver webDriver) {

  // webDriver.quit();

  // webDriver=null;

  innerQueue.add(webDriver);

  }

  public void close(WebDriver webDriver) {

  refCount.decrementAndGet();

  webDriver.quit();

  webDriver = null;

  }

  public void shutdown() {

  try {

  for (WebDriver driver : innerQueue) {

  close(driver);

  }

  innerQueue.clear();

  } catch (Exception e) {

  // e.printStackTrace();

  logger.warn("webdriverpool关闭失败",e);

  }

  }

  }

  之后便是SeleniumDownloader

  import org.openqa.selenium.By;

  import org.openqa.selenium.Cookie;

  import org.openqa.selenium.WebDriver;

  import org.openqa.selenium.WebElement;

  import org.slf4j.Logger;

  import org.slf4j.LoggerFactory;

  import us.codecraft.webmagic.Page;

  import us.codecraft.webmagic.Request;

  import us.codecraft.webmagic.Site;

  import us.codecraft.webmagic.Task;

  import us.codecraft.webmagic.downloader.Downloader;

  import us.codecraft.webmagic.selector.Html;

  import us.codecraft.webmagic.selector.PlainText;

  import us.codecraft.webmagic.utils.UrlUtils;

  import java.util.Map;

  /**

  * @author taojw

  *

  */

  public class SeleniumDownloader implements Downloader{

  private static final Logger log=LoggerFactory.getLogger(SeleniumDownloader.class);

  private int sleepTime=3000;//3s

  private SeleniumAction action=null;

  private WebDriverPool webDriverPool=new WebDriverPool();

  public SeleniumDownloader(){

  }

  public SeleniumDownloader(int sleepTime,WebDriverPool pool){

  this(sleepTime,pool,null);

  }

  public SeleniumDownloader(int sleepTime,WebDriverPool pool,SeleniumAction action){

  this.sleepTime=sleepTime;

  this.action=action;

  if(pool!=null){

  webDriverPool=pool;

  }

  }

  public SeleniumDownloader setSleepTime(int sleepTime) {

  this.sleepTime = sleepTime;

  return this;

  }

  public void setOperator(SeleniumAction action){

  this.action=action;

  }

  @Override

  public Page download(Request request, Task task) {

  WebDriver webDriver;

  try {

  webDriver = webDriverPool.get();

  } catch (InterruptedException e) {

  log.warn("interrupted", e);

  return null;

  }

  log.info("downloading page " + request.getUrl());

  Page page = new Page();

  try {

  webDriver.get(request.getUrl());

  Thread.sleep(sleepTime);

  } catch (InterruptedException e) {

  e.printStackTrace();

  } catch (Exception e) {

  webDriverPool.close(webDriver);

  page.setSkip(true);

  return page;

  }

  // WindowUtil.changeWindow(webDriver);

  WebDriver.Options manage = webDriver.manage();

  Site site = task.getSite();

  if (site.getCookies() != null) {

  for (Map.Entry cookieEntry : site.getCookies()

  .entrySet()) {

  Cookie cookie = new Cookie(cookieEntry.getKey(),

  cookieEntry.getValue());

  manage.addCookie(cookie);

  }

  }

  manage.window().maximize();

  if(action!=null){

  action.execute(webDriver);

  }

  SeleniumAction reqAction=(SeleniumAction) request.getExtra("action");

  if(reqAction!=null){

  reqAction.execute(webDriver);

  }

  WebElement webElement = webDriver.findElement(By.xpath("/html"));

  String content = webElement.getAttribute("outerHTML");

  page.setRawText(content);

  page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content,

  webDriver.getCurrentUrl())));

  page.setUrl(new PlainText(webDriver.getCurrentUrl()));

  page.setRequest(request);

  webDriverPool.returnToPool(webDriver);

  return page;

  }

  @Override

  public void setThread(int thread) {

  }

  }

  这里的扩展性主要体现在,我加入了SeleniumAction接口,可以在SeleniumDownloader初始化的时候配置一个全局的SeleniumAction,以及为每个Request配置对应的SeleniumAction。 SeleniumAction接口如下:

  public interface SeleniumAction {

  void execute(WebDriver driver);

  }

  它会获得一个WebDriver实例,你可以在里面进行任意的Selenium操作。

  以上内容为大家介绍了自动化测试中的爬虫框架WebMagic源码分析之Selenium,本文由多测师亲自撰写,希望对大家有所帮助。了解更多自动化测试相关知识:https://www.aichudan.com/xwzx/

联系电话

17727591462

返回顶部