中文字幕av专区_日韩电影在线播放_精品国产精品久久一区免费式_av在线免费观看网站

溫馨提示×

溫馨提示×

您好,登錄后才能下訂單哦!

密碼登錄×
登錄注冊×
其他方式登錄
點擊 登錄注冊 即表示同意《億速云用戶服務條款》

goquery 爬蟲實踐

發布時間:2020-05-31 03:42:11 來源:網絡 閱讀:752 作者:xingzhehxiang 欄目:編程語言
/*獲取URL范例*/
/*
Find?查找獲取當前匹配的每個元素的后代
Eq?選擇第幾個
Attr?獲取對應的標簽屬性
AttrOr?獲取對應的標簽屬性。這個可以設置第二個參數。獲取的默認值?如果獲取不到默認調用對應默認值
Each?遍歷每一個元素
Text?獲取當前對應的文本
Html?獲取當前對象的標簽
AddClass?添加?class?不過用來抓取有點雞肋不知道為何要寫這個
Children?返回所有子元素
Filter?過濾標簽元素
Prev?獲取上一個元素
Next?獲取下一個元素
*/
package?main

import?(
????"fmt"
????"log"
????"os"
????"regexp"
????"strconv"

????"github.com/PuerkitoBio/goquery"
)

func?getdata(ins?int,?ch?chan?int)?{
????url?:=?""
????if?ins?==?1?{
????????url?=?"https://colobu.com/categories/Go"
????}?else?{
????????url?=?"https://colobu.com/categories/Go/page/"?+?strconv.Itoa(ins)?+?"/"
????}
????doc,?err?:=?goquery.NewDocument(url)
????if?err?!=?nil?{
????????log.Fatal(err)
????}
????//??<a?class="article-title"?href="/2019/06/01/packet-capture-injection-and-analysis-gopacket/">[譯]利用?gopackage?進行包的捕獲、注入和分析</a>
????doc.Find(".article-title").Each(func(i?int,?s?*goquery.Selection)?{
????????a,?_?:=?s.Attr("href")
????????text?:=?s.Text()
????????a?=?"https://colobu.com"?+?a
????????//htmls,?_?:=?s.Html()
????????fmt.Println("")
????????fmt.Println("")

????????fmt.Println("??地址:"?+?a)
????????fmt.Println("??標題:"?+?text)
????????/*text?=?strings.ReplaceAll(text,?":",?"?")
????????text?=?strings.ReplaceAll(text,?"/",?"?")
????????text?=?strings.ReplaceAll(text,?"\\",?"?")
????????text?=?strings.ReplaceAll(text,?"?",?"?")
????????text?=?strings.ReplaceAll(text,?"*",?"?")?*/
????????reg?:=?regexp.MustCompile(`:|\?|/|\*|<|>|"`)
????????tilte?:=?reg.ReplaceAllString(text,?"?")
????????docm,?err?:=?goquery.NewDocument(a)
????????if?err?!=?nil?{
????????????log.Fatal(err)
????????}
????????sstext?:=?""
????????docm.Find(".article-entry").Each(func(ii?int,?ss?*goquery.Selection)?{
????????????sstext?=?ss.Text()

????????})
????????//fmt.Println("??正文:"?+?sstext)
????????file,?_?:=?os.OpenFile("./爬蟲/第"+strconv.Itoa(ins)+"頁??"+strconv.Itoa(i+1)+"篇??"+tilte+"頁爬蟲.txt",?os.O_RDWR|os.O_TRUNC|os.O_CREATE,?0666)
????????defer?file.Close()
????????file.Write([]byte(text?+?"\n正文:\n"?+?sstext?+?"\n\n\n"))
????????fmt.Println("??----------------------------------------------------------------------------?")

????})
????ch?<-?ins
}

func?Doing(s,?e?int)?{
????ch?:=?make(chan?int)
????for?i?:=?s;?i?<=?e;?i++?{
????????go?getdata(i,?ch)
????}
????for?i?:=?s;?i?<=?e;?i++?{
????????n?:=?<-ch
????????fmt.Printf("第%d頁爬取完畢\n",?n)
????}
}
func?main()?{
????var?start,?end?int
????fmt.Println("輸入起始頁")
????fmt.Scan(&start)
????fmt.Println("輸入終止頁")
????fmt.Scan(&end)
????Doing(start,?end)
}
package?main

import?(
????"fmt"
????"io"
????"net/http"
????"os"
????"strconv"
)

func?HttpGet(url?string)?(res?string,?err?error)?{
????fmt.Println(url)
????resp,?err1?:=?http.Get(url)
????if?err1?!=?nil?{
????????err?=?err1
????????//fmt.Println(err)
????????return
????}
????//fmt.Println(resp.Body)
????defer?resp.Body.Close()
????buf?:=?make([]byte,?4096)
????for?{
????????n,?err2?:=?resp.Body.Read(buf)
????????if?n?==?0?{
????????????fmt.Println("讀取完畢")
????????????break
????????}
????????if?err2?!=?nil?&&?err2?!=?io.EOF?{
????????????//fmt.Println(err2)
????????????err?=?err2
????????????return
????????}
????????res?+=?string(buf[:n])
????}
????return
}
func?working(start,?end?int)?{
????fmt.Printf("正在爬取%d頁面到%d頁",?start,?end)
????for?i?:=?start;?i?<=?end;?i++?{
????????url?:=?"http://tieba.baidu.com/f?kw=%E5%88%AB%E5%85%8B&ie=utf-8&pn="?+?strconv.Itoa((i-1)*50)
????????//resp,?err?:=?http.Get(url)
????????result,?err?:=?HttpGet(url)
????????if?err?!=?nil?{
????????????fmt.Println(err)
????????????continue
????????}
????????fmt.Println(result)
????????file,?err?:=?os.Create("第"?+?strconv.Itoa(i)?+?"頁面.html")
????????if?err?!=?nil?{
????????????fmt.Println(err)
????????}
????????file.WriteString(result)
????????file.Close()
????}
}

func?main()?{
????var?start,?end?int
????fmt.Println("請輸入爬取的起始頁(》=1):")
????fmt.Scan(&start)
????fmt.Println("請輸入爬取的結束頁(》=start):")
????fmt.Scan(&end)

????working(start,?end)
}
package?main

import?(
????"fmt"
????"io"
????"net/http"
????"os"
????"strconv"
)

func?HttpGet(url?string)?(res?string,?err?error)?{
????fmt.Println(url)
????resp,?err1?:=?http.Get(url)
????if?err1?!=?nil?{
????????err?=?err1
????????//fmt.Println(err)
????????return
????}
????//fmt.Println(resp.Body)
????defer?resp.Body.Close()
????buf?:=?make([]byte,?4096)
????for?{
????????n,?err2?:=?resp.Body.Read(buf)
????????if?n?==?0?{
????????????//fmt.Println("讀取完畢")
????????????break
????????}
????????if?err2?!=?nil?&&?err2?!=?io.EOF?{
????????????//fmt.Println(err2)
????????????err?=?err2
????????????return
????????}
????????res?+=?string(buf[:n])
????}
????return
}

func?getdata(i?int,?ch?chan?int)?{
????url?:=?"http://tieba.baidu.com/f?kw=%E5%88%AB%E5%85%8B&ie=utf-8&pn="?+?strconv.Itoa((i-1)*50)
????//resp,?err?:=?http.Get(url)
????fmt.Println("第"?+?strconv.Itoa(i)?+?"頁面.html")
????result,?err?:=?HttpGet(url)
????if?err?!=?nil?{
????????fmt.Println(err)
????????//continue
????}
????//fmt.Println(result)
????file,?err?:=?os.Create("第"?+?strconv.Itoa(i)?+?"頁面.html")
????if?err?!=?nil?{
????????fmt.Println(err)
????}
????file.WriteString(result)
????file.Close()
????ch?<-?i
}

func?working(s,?e?int)?{
????ch?:=?make(chan?int)
????for?i?:=?s;?i?<=?e;?i++?{
????????go?getdata(i,?ch)
????}
????for?i?:=?s;?i?<=?e;?i++?{
????????n?:=?<-ch
????????fmt.Printf("第%d頁爬取完畢\n",?n)
????}
}

func?main()?{
????var?start,?end?int
????fmt.Println("請輸入爬取的起始頁(》=1):")
????fmt.Scan(&start)
????fmt.Println("請輸入爬取的結束頁(》=start):")
????fmt.Scan(&end)

????working(start,?end)

}


向AI問一下細節

免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。

AI

呼图壁县| 大方县| 五台县| 金川县| 航空| 城市| 黎平县| 太和县| 叶城县| 遂昌县| 武隆县| 屯门区| 密山市| 炎陵县| 加查县| 龙江县| 平舆县| 香格里拉县| 获嘉县| 泰安市| 内乡县| 新干县| 吉安县| 射阳县| 锡林郭勒盟| 镇赉县| 资阳市| 会东县| 浮梁县| 阿克| 汉阴县| 新化县| 无棣县| 运城市| 堆龙德庆县| 九龙县| 五寨县| 沂源县| 贡觉县| 沧源| 泰州市|