多测师是一家拥有先进的教学理念,强大的师资团队,业内好评甚多的接口自动化测试培训机构!

17727591462

联系电话

您现在所在位置:接口自动化测试培训 > 新闻资讯

利用Selenium爬取淘宝美食网页内容-自动化测试

更新时间:2022-03-23 09:47:57 作者:多测师 浏览:223

  开发工具

  Python版本:3.6

  相关模块:

  import re

  from selenium import webdriver

  from selenium.common.exceptions import TimeoutException

  from selenium.webdriver.common.by import By

  from selenium.webdriver.support.ui import WebDriverWait

  from selenium.webdriver.support import expected_conditions as EC

  from pyquery import PyQuery as pq

  from config import *

  config文件

  SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']

  KEYWORD = '美食'

利用Selenium爬取淘宝美食网页内容-自动化测试

  全部代码

  '''

  import re

  from selenium import webdriver

  from selenium.common.exceptions import TimeoutException

  from selenium.webdriver.common.by import By

  from selenium.webdriver.support.ui import WebDriverWait

  from selenium.webdriver.support import expected_conditions as EC

  from pyquery import PyQuery as pq

  from config import *

  driver = webdriver.PhantomJS(service_args=SERVICE_ARGS)

  # driver = webdriver.Chrome()

  wait = WebDriverWait(driver, 10)

  driver.set_window_size(1400,900) #有这这句话就是可以爬取到网页的内容,没有的话就出现TimeOut错误

  def search():

  print('正在搜索')

  try:

  driver.get('http://www.taobao.com')

  s_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#q')))

  sumbit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_TSearchForm > div.search-button > button')))

  s_input.send_keys(KEYWORD)

  sumbit.click()

  totle = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.total')))

  get_products()

  return totle.text

  except TimeoutException:

  print('TimeOut')

  return search()

  def next_page(page_number):

  print('正在翻页', page_number)

  try:

  s_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > input')))

  sumbit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit')))

  s_input.clear()

  s_input.send_keys(page_number)

  sumbit.click()

  wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > ul > li.item.active > span'),str(page_number)))

  get_products()

  except TimeoutException:

  print('TimeOut')

  next_page(page_number)

  def get_products():

  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item')))

  html = driver.page_source

  doc = pq(html)

  items = doc('#mainsrp-itemlist .items .item').items()

  for item in items:

  product = {

  'image': item.find('.pic .img').attr('src'),

  'price':item.find('.price').text(),

  'deal': item.find('.deal-cnt').text()[:-3],

  'title': item.find('.title').text(),

  'shop': item.find('.shop').text(),

  'location': item.find('.location').text()

  }

  print(product)

  def main():

  try:

  totle = search()

  totle = int(re.compile('(d+)').search(totle).group(1))

  for num in range(2,totle + 1):

  next_page(num)

  except Exception as e:

  print(e)

  finally: #最后执行的操作

  driver.close()

  if __name__ == '__main__':

  main()

  以上内容为大家介绍了自动化测试中的利用Selenium爬取淘宝美食网页内容,本文由多测师亲自撰写,希望对大家有所帮助。了解更多自动化测试相关知识:https://www.aichudan.com/xwzx/

联系电话

17727591462

返回顶部