java一些小的技术分享(java新技术分享)
282
2022-08-30
Python: 两秒爬取最新大学排名(python培训)
import tracebackimport requestsimport xlsxwriterfrom pyquery import PyQuery as pq#init workBookdef __initWorkBook(): workBook = xlsxwriter.Workbook('QSUniversities.xlsx') print("[info]: workBook initialized") return workBook# init sheetdef __initSheet(workBook): Sheet = workBook.add_worksheet() #initial the table head Sheet.write(0, 0, 'Rank') Sheet.write(0, 1, 'University') Sheet.write(0, 2, 'Location') Sheet.write(0, 3, 'Overall Score') Sheet.write(0, 4, 'Academic Reputation') Sheet.write(0, 5, 'Employer Reputation') Sheet.write(0, 6, 'Citations per Faculty') Sheet.write(0, 7, 'Faculty Student Ratio') Sheet.write(0, 8, 'International Students Ratio') Sheet.write(0, 9, 'International Faculty Ratio') Sheet.write(0, 10, 'International Research Network') Sheet.write(0, 11, 'Employment Outcomes') print("[info]: sheet initialized") return Sheetdef __closeWorkBook(workBook): workBook.close() print("[info]: WorkBook closed")def record(): workBook = __initWorkBook() sheet = __initSheet(workBook=workBook) queryList = ["overall_rank_dis", "uni", "city+location", "overall", "ind_76", "ind_77", "ind_73", "ind_36", "ind_14", "ind_18", "ind_15", "ind_3819456"] try: ret = requests.get(" data = ret.json().get("data") curLine = 1 for item in data: for i in range(len(queryList)): if i == 2: qs = queryList[2].split("+") city = item.get(qs[0]) location = item.get(qs[1]) if city != "": city = pq(city).text() if location != "": location = pq(location).text() sheet.write(curLine, 2, city + location) elif item.get(queryList[i]) != "": sheet.write(curLine, i, pq(item.get(queryList[i])).text()) else: sheet.write(curLine, i, "") print(f"[info]: {curLine} processed...") curLine = curLine + 1 except: traceback.print_exc() __closeWorkBook(workBook=workBook)if __name__ == "__main__": record()
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
发表评论
暂时没有评论,来抢沙发吧~