import re
import requests
from bs4 import BeautifulSoup
import lxml
url = 'https://www.zxcs.info/sort/3/page/2'
html = requests.get(url)
soup = BeautifulSoup(html.text, 'lxml')
data = soup.select('#plist>dt>a')
print(data)
for item in data:
resuls = {
'title':item.get_text(),
'link':item.get('href')
}
def pages(link):
info = requests.get(link)
soups = BeautifulSoup(info.text,'lxml')
name = soups.select('body > div.wrap > div.book-detail-wrap.center990 > div.book-information.cf > div.book-info > h1')
jieshao = soups.select('body > div.wrap > div.book-detail-wrap.center990 > div.book-content-wrap.cf > div > div.book-info-detail')
down = soups.select('#download > a')
print (html2text(name))
print (html2text(jieshao))
print (html2text(down))
print(pages('https://www.zxcs.info/post/14929'))
def html2text(text):
pat = re.compile(r'<[^>]+>',re.S)
result = pat.sub('',text)
return result