Python如何实现搜索Google Scholar论文信息
导读:本文共2606字符,通常情况下阅读需要9分钟。同时您也可以点击右侧朗读,来听本文内容。按键盘←(左) →(右) 方向键可以翻页。
摘要: 示例数据示例代码importrequestsfrombs4importBeautifulSoupfromtqdmimporttqdmfrompybtex.databaseimportBibliographyData,Entryfrompybtex.database.inputimportbibteximportpandasaspdimporttime... ...
音频解说
目录
(为您整理了一些要点),点击可以直达。示例数据
示例代码
importrequestsfrombs4importBeautifulSoupfromtqdmimporttqdmfrompybtex.databaseimportBibliographyData,Entryfrompybtex.database.inputimportbibteximportpandasaspdimporttimeimportjsonimportrandomdefsearch_doi(doi):'''根据doi查论文详细信息'''url=f'https://api.crossref.org/works/{doi}'response=requests.get(url)result=Noneifresponse.status_code==200:result=response.json()['message']else:print('Erroroccurred')returnresult#doi='https://dl.acm.org/doi/abs/10.1145/3394486.3403237'#result=search_doi(doi)#print(f"Title:{result['title'][0]}:{result['subtitle'][0]}")#print(f"Author(s):{','.join(author['given']+''+author['family']forauthorinresult['author'])}")#print(f"Journal:{result['container-title'][0]}")#print(f"PublicationDate:{result['published-print']['date-parts'][0][0]}")defsearch_cite(atid):'''根据atid查cite'''url=f'https://scholar.google.com/scholar?q=info:{atid}:scholar.google.com/&output=cite&scirp=8&hl=zh-CN'resp=requests.get(url)soup=BeautifulSoup(resp.text,'lxml')result={}foriteminsoup.find_all('tr'):cith=item.find('th',class_='gs_cith').getText()citr=item.find('div',class_='gs_citr').getText()result[cith]=citrreturnresult#result=search_cite('_goqYZv1zjMJ')#print(result)#更改节点配置defchange_clash_node(node_name=None):#ClashAPI的URL和密码url='http://127.0.0.1:15043/proxies/????国外流量'password='ee735f4e-59c6-4d60-a2ad-aabd075badb2'local_node_name=['香港1-IEPL-倍率1.0','香港2-IEPL-倍率1.0','香港3-IEPL-倍率1.0','台湾1-IEPL-倍率1.0','台湾2-IEPL-倍率1.0','台湾3-IEPL-倍率1.0','新加坡1-IEPL-倍率1.0','新加坡2-IEPL-倍率1.0','新加坡3-IEPL-倍率1.0']node_name=node_nameorrandom.choice(local_node_name)print(f'当前选择节点名称:{node_name}')headers={'Authorization':password}data={'name':'Rule','type':'Selector','now':node_name}response=requests.put(url,headers=headers,json=data)ifresponse.status_code==200:print('节点已更改为:',node_name)else:print('更改节点时出错:',response.text)#更改节点为my_node#change_clash_node()defproxy_requests(url):proxies={'http':'socks5://127.0.0.1:7890','https':'socks5://127.0.0.1:7890'}returnrequests.get(url,proxies=proxies)defsearch(title='GNN',start=0):url=f'https://scholar.google.com/scholar?start={start}&q=allintitle:+{title}&hl=zh-CN&as_sdt=0,5'resp=proxy_requests(url)soup=BeautifulSoup(resp.text,'lxml')try:papers_item=soup.find(id='gs_res_ccl_mid').find_all('div',class_='gs_scl')except:print(soup)if'captcha-form'insoup:return-1papers_info=[]forpaperinpapers_item:publisher=paper.find('div',class_='gs_or_ggsm').getText().split()[1].split('.')[0]href=paper.find('h4',class_='gs_rt').find('a').get('href')title=paper.find('h4',class_='gs_rt').find('a').getText()detail=paper.find('div',class_='gs_ri').find('div',class_='gs_a').getText()year=detail.split(',')[-1].strip()[:4]#atid=paper.find('h4',class_='gs_rt').find('a').get('data-clk-atid')#cite_info=search_cite(atid)['MLA']#cite_info_filter=list(filter(lambdax:x,map(lambdax:x.strip().strip('"').strip(),cite_info.strip().split('.'))))#author,title,publisher,year=cite_info_filterpapers_info.append({'title':title,'year':year,'publisher':publisher,'href':href})returnpapers_infoindex_start=0index_end=500index_gap=10papers_store=[]bar=tqdm(total=index_end-index_start,desc=f'From{index_start}to{index_end}')#forstartinrange(index_start,index_end,index_gap):whileindex_start<index_end:try:papers_info=search(title='GNN',start=index_start)ifpapers_info==-1:print('需要验证码,更换节点后2秒内重试')change_clash_node()time.sleep(2)continuepapers_store.extend(papers_info)exceptAttributeErrorase:print(e)breakindex_start+=index_gapbar.update(index_gap)bar.refresh()time.sleep(0.1)bar.close()df=pd.DataFrame(papers_info)print(df)df.to_csv('data.csv',index=False)
</div> <div class="zixun-tj-product adv-bottom"></div> </div> </div> <div class="prve-next-news">
本文:
Python如何实现搜索Google Scholar论文信息的详细内容,希望对您有所帮助,信息来源于网络。