您好,登錄后才能下訂單哦!
前言:
某天突發奇想,想要實現一個農歷生日提醒。這個時候有了如下的大概思路:
獲取農歷 ——> 匹配 ——> 提醒
發現實現這個需求最主要的在獲取農歷這一塊,后邊兩個不是什么巨大挑戰。遂查閱一番資料,發現數據庫可以實現陽歷轉陰歷功能。在數據庫這一塊,我是在是小菜,看不懂存儲過程。蛋疼!直接放棄改用其他思路。思考了許久,想到可以通過爬蟲爬取現成的日歷信息,存庫再進行匹配。所以,現在思路如下:
爬取農歷存庫 ——> 匹配姓名表 ——> 提醒
一、爬取日歷網站
剛開始是按照每年一個表的思路去爬,代碼及注釋如下:
首先是建表:
(文件名:reptile\CreateDb.py)
# -*- coding:utf-8 -*- import MySQLdb import os #將敏感信息寫入環境變量 通過export去設置value MYSQLDB_HOST=os.environ.get('MYSQLDB_HOST') MYSQLDB_USER=os.environ.get('MYSQLDB_USER') MYSQLDB_PASSWD=os.environ.get('MYSQLDB_PASSWD') db = MySQLdb.connect( host=MYSQLDB_HOST, port=3306, user=MYSQLDB_USER, passwd = MYSQLDB_PASSWD, db='Calendar', charset = "utf8", ) cursor = db.cursor() #數據庫插入 def Insert_mysql(sql): cursor.execute(sql) db.commit() #數據庫查詢 def Inquire_mysql(sql): cursor.execute(sql) request = cursor.fetchall() return request if __name__ == "__main__": st_sql = "show tables;" cursor.execute(st_sql) request = cursor.fetchall() for year in range(1900,2050): if year in request: print "[*]%d is in database!" else: print "[!]%s No in the Database,create now." % year ct_sql = """CREATE TABLE `%d` ( `DAY` date NOT NULL , `WEEK` varchar(50) NULL , `CONSTELLATON` varchar(50) NULL , `FESTIVAL` varchar(24) NULL , `YEAR` varchar(24) NULL , `LUNARCALENDAR` varchar(60) NULL , `LUNNAR` varchar(24) NULL , `ERSHIBASU` varchar(24) NULL , `JIAZI` varchar(24) NULL , PRIMARY KEY (`DAY`) ) ENGINE=InnoDB DEFAULT CHARACTER SET=utf8 ;""" % year try: cursor.execute(ct_sql) db.commit() print "[.] %s OK!" % year except: print "Error:unable to fecth data" db.close()
效果:
接著爬取:
(文件名:reptile\Spider.py)
#coding:utf-8 import re,urllib2 from bs4 import BeautifulSoup from urllib import urlencode class SiteData: def __init__(self,url): self.Url = url def Data(self): #偽裝頭 values = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,p_w_picpath/w ebp,*/*;q=0.8', 'Accept-Language':'zh-CN,zh;q=0.8', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36' } SiteUrl = urllib2.Request(self.Url,headers=values) WebSite = urllib2.urlopen(SiteUrl) HtmlData = WebSite.read() WebSite.close() return HtmlData def Find(self): #BeautifulSoup篩選出我要的內容 HtmlData = self.Data() HtmlSoup = BeautifulSoup(HtmlData,"html5lib") FindALL = HtmlSoup.find_all("a") List=[] for i in FindALL: filter = i.encode('gb2312').replace(' ',' ').replace(';','') if "#" in filter: ReplaceText = filter.replace('<a href="#" onclick="return(false)" title="','') Extract2 = re.sub(r"\<.*\>","",ReplaceText) Extract1 = re.sub(r"[0-9]+\:[0-9]+","",Extract2) Extract = Extract1.replace(' 0-','').replace('">','') List.append(Extract) else: pass return List
開始爬取:
(文件名:reptile\Run.py)
#coding:utf-8 import time from CreateDb import db,cursor #將數據庫操作的一些方法加進來 from Spider import SiteData #將爬蟲操作的方法加進來 def ListMark(list): LIST = [] items = list.split('\n') List = [items[1],items[2],items[3],items[4],items[6],items[7],items[8]] for i in List: item,value = i.split(':') LIST.append(value) return LIST def get_item(text): item,value = text.split(' ') return item def get_value(text): item,value = text.split(' ') return value def UTF(text): TEXT = text.decode('gbk').encode('utf8') return TEXT if __name__ == '__main__': for year in range(1900,2050): sql = "select DAY from Calendar.%s order by DAY desc limit 1" % year cursor.execute(sql) request = cursor.fetchall() print "[!]process %s" % year try: Month = str(request).split(',')[1] except: Month = str(request) if Month == "()": print "[!]No data in Table! Crawling now.." for m in range(1,13): url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,m) OpenSite = SiteData(url) returnList = OpenSite.Find() for i in returnList: List = ListMark(i) Day = get_item(List[0]) Week = get_value(List[0]) Constellation = List[1] Festival = List[2] Year = List[3][0:4] Lunarcalendar = List[3][4:] Lunnar = List[4] Ershibasu = List[5] Jiazi = List[6] Insert_sql = "insert into Calendar.%s values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \ % (year,Day,UTF(Week),UTF(Constellation),UTF(Festival),\ UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi)) try: cursor.execute(Insert_sql) db.commit() print "[ ]Insert %s data ..." % Day except: print "[!]Insert %s Error!!!" % Day db.rollback() time.sleep(0.1) try: if Month == 12: print "[ ] Have data in Table." else: NodataMoth = range(int(Month)+1,13) for M in NodataMoth: url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,M) OpenSite = SiteData(url) returnList = OpenSite.Find() for i in returnList: List = ListMark(i) Day = get_item(List[0]) Week = get_value(List[0]) Constellation = List[1] Festival = List[2] Year = List[3][0:4] Lunarcalendar = List[3][4:] Lunnar = List[4] Ershibasu = List[5] Jiazi = List[6] Insert_sql = "insert into Calendar.%s values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \ % (year,Day,UTF(Week),UTF(Constellation),UTF(Festival),\ UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi)) try: cursor.execute(Insert_sql) print "[ ]Insert %s data ..." % Day db.commit() except: print "[!]Insert %s Error!!!" % Day db.rollback() time.sleep(0.1) except: print "[!]Unknown Error!" db.close()
效果:
平均1條6條數據 從1900-2049年需要2個半小時以上時間。
爬完仔細一下,一個表才365條記錄,1900-2049才150年 完全可以合成一張表。
遂,寫腳本,將所有表合體,現在所有日期數據都在數據庫里了:
二、將想要提醒的人加入一張表,用于匹配
姓名表比較簡單,主要有姓名,性別,日期,農歷日期。一些星座等其他信息可以根據需要自己添加。一個字段一個字段添加太過蛋疼,可以用數據庫觸發器自動填充一些,但是。。我!不!會!遂,寫了個腳本用于添加:
(文件名:add_use.py)
#coding:utf-8 import re,sys from reptile.CreateDb import Insert_mysql,Inquire_mysql def UTF(text): TEXT = text.decode('UTF-8').encode('UTF-8') return TEXT #新增用戶的方法 def add(Name,Sex,Birthday): #自動新增ID,匹配最后一個ID,新ID是最后一個ID+1 Last_id_sql = "SELECT Tb_use.USE_ID FROM Calendar.`Tb_use` \ ORDER BY Tb_use.USE_ID DESC LIMIT 1" Last_id = Inquire_mysql(Last_id_sql) try: m = re.search(r"[0-9]+",str(Last_id)) n = m.group() id = int(n)+1 except: id = 1 #如果填入為1 則為男性 if Sex == "1": SEX = "男" else: SEX = "女" birthday_sql = "SELECT Calendar.Lunarcalendar FROM Calendar WHERE \ Calendar.TB_DAY = '%s';" % Birthday #匹配日歷表 自動填充農歷信息 Lunarcalendar = Inquire_mysql(birthday_sql) Lunar = Lunarcalendar[0][0].encode('UTF-8') Insert_sql ="INSERT INTO `Tb_use` (`USE_ID`,`NAME`,`SEX`,`TB_DAY`,\ `LUNARCALENDAR`) VALUES ('%s','%s','%s','%s','%s');" % \ (id,UTF(Name),UTF(SEX),Birthday,Lunar) Insert_mysql(Insert_sql) print "[!]新增新記錄,編號為%s\n姓名:%s 性別:%s \n生日:%s 農歷:%s" \ %(id,Name,SEX,Birthday,Lunar) #刪除用戶的方法可以根據新增用戶方法去修改,這里pass def DEL(self): pass #將方法映射到字典 def run(type,Name=None,Sex=None,Birthday=None): RUN = { 'add':lambda:add(Name,Sex,Birthday), 'del':lambda:DEL() } return RUN[type]() if __name__ == "__main__": #實現類似命令行交互式效果 while True: echo = raw_input(">>") Split = echo.split(' ') if echo == "exit": print "Exit!" break elif echo == "": pass #如果輸入信息正確,則運行方法 elif Split[0] == "add" or Split[0] == "del": try: a = run(Split[0],Split[1],Split[2],Split[3]) except: pass else: print "Please Use 'add' to Add user."
三、匹配后發送提醒
(文件名:Run.py)
#coding:utf-8 import time,json,smtplib,os from reptile.CreateDb import Insert_mysql,Inquire_mysql from email.mime.text import MIMEText from email.MIMEMultipart import MIMEMultipart from email.MIMEBase import MIMEBase #今天的日期 Today = time.strftime('%Y-%m-%d',time.localtime(time.time())) #從數據庫從獲取今天的ID Today_Sql = "SELECT Calendar.ID FROM `Calendar` WHERE TB_DAY='%s';" % Today TodayID = Inquire_mysql(Today_Sql)[0][0] #這個方法用于獲取今天日期 def Get_today(id): Sql = "SELECT Calendar.TB_DAY FROM `Calendar` WHERE ID='%s';" % id Today = Inquire_mysql(Sql)[0][0] return Today #用于獲取今天有哪些小伙伴生日 def Today_Birthday(id): Text = [] Today_Sql = "SELECT Calendar.LUNARCALENDAR FROM `Calendar` WHERE ID='%s'" % id TodayLunar = Inquire_mysql(Today_Sql)[0][0] Today_Birthday_sql = "SELECT Tb_use.NAME FROM `Tb_use` WHERE LUNARCALENDAR='%s';" % TodayLunar Today_Birthday = Inquire_mysql(Today_Birthday_sql) if str(Today_Birthday) == '()': Text = [] else: for i in range(0,len(Today_Birthday)): Text.append(Today_Birthday[i][0]) if Text == []: Str = "" else: Str = '\t'.join(i.encode('utf-8') for i in Text) return Str #用于獲取7天內有哪些小伙伴生日 def Week_Birthday(): Text = [] for item in range(TodayID,TodayID+7): if Today_Birthday(item) == "": pass else: Text.append("%s : %s" % (Get_today(item),Today_Birthday(item))) Str = '\n'.join(i for i in Text) return Str #發郵件提醒 def Mail(Subject,text): MAIL_USER=os.environ.get('MAIL_USER') MAIL_PASSWD=os.environ.get('MAIL_PASSWD') sender = 'cctv<你的郵箱@qq.com>' receiver = ['發送到@139.com'] subject = Subject smtpserver = 'smtp.mail.qq.com' msg = MIMEMultipart('alternative') msg['Subject'] = subject html = text part = MIMEText(html,'html','utf-8') msg.attach(part) smtp = smtplib.SMTP() smtp.connect('smtp.mail.qq.com') smtp.login(MAIL_USER,MAIL_PASSWD) smtp.sendmail(sender,receiver,msg.as_string()) smtp.quit() if __name__ == '__main__': #獲取今天星期 a = time.localtime() Time = time.strftime("%w",a) #今天生日的小伙伴 tb_text = Today_Birthday(TodayID) #7天內生日的小伙伴 wb_text = Week_Birthday() #如果今天有人生日就發郵件 if tb_text == '': pass else: Mail('TodayBirthday',tb_text) #每周一查一下,如果未來7天內有人生日就發郵件 if Time == '1': if wb_text == '': pass else: Mail('WeekBirthday',wb_text) else: pass
最后,加入任務計劃
(文件名:Run.sh)
#!/bin/bash export MYSQLDB_USER=**** export MYSQLDB_PASSWD=**** export MYSQLDB_HOST=**** export MAIL_PASSWD=**** export MAIL_USER=**** #我是在虛擬環境下的,所以要用虛擬環境的路徑運行 `/home/ubuntu/class/env/bin/python /home/ubuntu/class/LunarBirthday/Run.py`
加入任務計劃,大功告成!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。