page contents

比较爬虫用的语言Python与Go

Python是我比较喜欢的语言,莫名的喜欢,对Python的学习可能起初是敲错了网址开始的,哈哈哈~ 工作的任务从一个网站后台做登录、爬取数据,写入服务器Redis中,同事认为我会用PHP来写,哼!...

attachments-2021-07-GdM7QHJm60ebb3b4ce59f.png

Python是我比较喜欢的语言,莫名的喜欢,对Python的学习可能起初是敲错了网址开始的,哈哈哈~

工作的任务从一个网站后台做登录、爬取数据,写入服务器Redis中,同事认为我会用PHP来写,哼!让你猜到那该多没意思,于是乎有了如下Python的代码,你看50多行搞定了。

 1 #!/usr/bin/python3
 2 import requests
 3 import re
 4 import redis
 5 from pyquery import PyQuery as pq
 6 
 7 loginUrl = 'https://manage.xxx.com.cn/home/login'
 8 userName = 'xxx'
 9 passWord = 'xxx'
10 
11 redisServer = '192.168.0.2'
12 redisPort = 6379
13 redisPass = ''
14 
15 productList = {'椰油':'CL_Spot','咖啡':'COFFEE','工业铜':'COPPER'}
16 volumeList = {'CL_Spot':[0, 0], 'COFFEE':[0, 0], 'COPPER':[0, 0]}
17 
18 def main():
19     jsessionid = getCookie()
20     doLogin(jsessionid)
21     dataUrl = 'https://manage.xxx.cn/?pageNo=1&pageSize=100'
22     cookies = {'JSESSIONID': jsessionid}
23     r = requests.get(dataUrl, cookies = cookies)
24     dom = pq(r.text)
25     lines = dom('table').eq(1).find('tr').items()
26     for line in lines:
27         line = re.sub(r'<!--.*-->', '', str(line))
28         pattern = re.compile(r'<td>(.*?)</td>')
29         group = pattern.findall(line)
30         if not group:
31             continue
32         productCode = productList[group[3]]
33         if group[6] == '':
34             volumeList[productCode][0]+= int(group[7]) * int(group[8])
35         if group[6] == '':
36             volumeList[productCode][1]+= int(group[7]) * int(group[8])
37 
38     redisClient = redis.Redis(host=redisServer, port=redisPort, password=redisPass)
39     for x in volumeList:
40         keyUp = 'redis_order_count_u_%s' % x
41         keyDown = 'redis_order_count_d_%s' % x
42         redisClient.set(keyUp, int(volumeList[x][0]))
43         redisClient.set(keyDown, int(volumeList[x][1]))
44 
45 def getCookie():
46     ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
47     r = requests.get(loginUrl, headers = ua)
48     return r.cookies['JSESSIONID']
49 
50 def doLogin(jsessionid):
51     param = {'userName': userName, 'password': passWord}
52     cookies = {'JSESSIONID': jsessionid}
53     requests.post(loginUrl, data = param, cookies = cookies)
54     
55 
56 if __name__ == '__main__':
57     main()

另一个服务也需要这个需求,用了最近看的Golang来实现一次,瞧写了100多行

  1 package main
  2 
  3 import (
  4     "fmt"
  5     "net/http"
  6     "net/url"
  7     "os"
  8     "strings"
  9     "strconv"
 10     "gopkg.in/redis.v4"
 11     "github.com/PuerkitoBio/goquery"
 12 )
 13 
 14 var loginUrl string = "https://manage.xxx.com.cn/home/login"
 15 var dataUrl string = "https://manage.xxx.com.cn/?pageNo=1&pageSize=100"
 16 var userName string = "xxx"
 17 var passWord string = "xxx"
 18 var redisServer string = "192.168.1.2"
 19 var redisPort string = "6379"
 20 var redisPass string = ""
 21 var redisDB   int = 0
 22 
 23 func main() {
 24     productList := make(map[string] string)
 25     productList["椰油"] = "CL_Spot"
 26     productList["咖啡"] = "COFFEE"
 27     productList["工业铜"] = "COPPER"
 28     volumeList := make(map[string] int)
 29     volumeList["u_CL_Spot"] = 0
 30     volumeList["d_CL_Spot"] = 0
 31     volumeList["u_COFFEE"] = 0
 32     volumeList["d_COFFEE"] = 0
 33     volumeList["u_COPPER"] = 0
 34     volumeList["d_COPPER"] = 0
 35     jsessionid := getCookie()
 36     doLogin(jsessionid)
 37 
 38     request, err := http.NewRequest("GET", dataUrl, nil)
 39     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
 40     client := &http.Client{}
 41     response, err := client.Do(request)
 42     if err != nil {
 43         fmt.Println(err.Error())
 44         os.Exit(0)
 45     }
 46     defer response.Body.Close()
 47     doc, err := goquery.NewDocumentFromReader(response.Body)
 48     doc.Find("table").Eq(1).Find("tr").Each(func(i int, tr *goquery.Selection) {
 49         td := tr.Find("td")
 50         name := td.Eq(3).Text()
 51         dir := td.Eq(6).Text()
 52         if val, ok := productList[name]; ok {
 53             buyNum, _ := strconv.Atoi(td.Eq(7).Text())
 54             buyUnit, _ := strconv.Atoi(td.Eq(8).Text())
 55             num :=  buyNum * buyUnit
 56             cacheKey := ""
 57             if dir == "" {
 58                 cacheKey = fmt.Sprintf("u_%s", val)
 59             } else if dir == "" {
 60                 cacheKey = fmt.Sprintf("d_%s", val)
 61             }
 62             volumeList[cacheKey] += num
 63         }
 64     })
 65     redisClient := redis.NewClient(&redis.Options{
 66         Addr:     fmt.Sprintf("%s:%s", redisServer, redisPort),
 67         Password: redisPass,
 68         DB:       redisDB,
 69     })
 70     for k, v := range volumeList {
 71         strKey := fmt.Sprintf("redis_order_count_%s", k)
 72         redisClient.Set(strKey, int(v), 0)
 73     }
 74     fmt.Println("puti volume get success")
 75 }
 76 
 77 func getCookie() string {
 78     jsessionid := ""
 79     response, err := http.Get(loginUrl)
 80     if err != nil {
 81         fmt.Println(err.Error())
 82         os.Exit(0)
 83     }
 84     defer response.Body.Close()
 85     for _, val := range response.Cookies() {
 86         if val.Name == "JSESSIONID" {
 87             jsessionid = val.Value
 88         }
 89     }
 90     return jsessionid
 91 }
 92 
 93 func doLogin(jsessionid string) bool {
 94     data := url.Values{}
 95     data.Set("userName", userName)
 96     data.Add("password", passWord)
 97     request, _ := http.NewRequest("POST", loginUrl, strings.NewReader(data.Encode()))
 98     request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
 99     request.Header.Add("Content-Length", strconv.Itoa(len(data.Encode())))
100     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
101     client := &http.Client{}
102     response, err := client.Do(request)
103     if err != nil {
104         fmt.Println(err.Error())
105         os.Exit(0)
106     }
107     defer response.Body.Close()
108     return true
109 }

Python的实现到上线半天的功夫搞定了,Go足足搞了1整天,蹩脚的语法与不熟悉的语法让我学习了很多知识点,最后Mac编译到Linux上执行也给我上了一课。

更多相关技术内容咨询欢迎前往并持续关注六星社区了解详情。

想高效系统的学习Python编程语言,推荐大家关注一个微信公众号:Python编程学习圈。每天分享行业资讯、技术干货供大家阅读,关注即可免费领取整套Python入门到进阶的学习资料以及教程,感兴趣的小伙伴赶紧行动起来吧。

attachments-2022-06-9MuTRHG762a05a1879f19.jpeg

  • 发表于 2021-07-12 11:16
  • 阅读 ( 590 )
  • 分类:Golang

0 条评论

请先 登录 后评论
轩辕小不懂
轩辕小不懂

2403 篇文章

作家榜 »

  1. 轩辕小不懂 2403 文章
  2. 小柒 1658 文章
  3. Pack 1135 文章
  4. Nen 576 文章
  5. 王昭君 209 文章
  6. 文双 71 文章
  7. 小威 64 文章
  8. Cara 36 文章