visitor_test.go 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. package qianlima
  2. import (
  3. "fmt"
  4. "git.aionnect.com/hello-go/spider/common"
  5. "git.aionnect.com/hello-go/spider/spiders"
  6. "git.aionnect.com/hello-go/spider/spiders/qianlima/items"
  7. "net/http"
  8. "testing"
  9. )
  10. func TestVisitorSpider(t *testing.T) {
  11. // 从起始页开始执行爬虫
  12. params := &items.PagingParams{PagingNo: 1, DaysLimit: 7}
  13. startingUrl := fmt.Sprintf(TargetUrl, params.PagingNo)
  14. target := &common.Target{
  15. Key: VisitorPaging,
  16. Method: http.MethodPost,
  17. URL: startingUrl,
  18. Referer: RefererUrl,
  19. Item: params,
  20. }
  21. spiders.Run(target,
  22. NewVisitorPagingSpider(),
  23. NewVisitorDetailSpider(),
  24. )
  25. fmt.Println("Done!")
  26. }
  27. func TestVisitorDetailSpider(t *testing.T) {
  28. // 单独测试详情页爬虫,主要为了测试不同网页编码是否解析正常
  29. // 未传Item参数,不会写数据库
  30. targets := []*common.Target{
  31. {Key: VisitorDetail, Method: http.MethodGet, URL: "http://www.qianlima.com/zb/detail/20200710_186393093.html"}, // UTF8
  32. {Key: VisitorDetail, Method: http.MethodGet, URL: "http://www.qianlima.com/zb/detail/20200705_185702517.html"}, // GBK
  33. {Key: VisitorDetail, Method: http.MethodGet, URL: "http://www.qianlima.com/zb/detail/20200703_185597771.html"}, // GBK
  34. }
  35. for _, target := range targets {
  36. spiders.Run(target,
  37. NewVisitorDetailSpider(),
  38. )
  39. }
  40. }