Python如何实现搜索Google Scholar论文信息(Google,python,开发技术)

时间:2024-04-28 00:20:27 作者 : 石家庄SEO 分类 : 开发技术
  • TAG :

示例数据

Python如何实现搜索Google Scholar论文信息

示例代码

importrequestsfrombs4importBeautifulSoupfromtqdmimporttqdmfrompybtex.databaseimportBibliographyData,Entryfrompybtex.database.inputimportbibteximportpandasaspdimporttimeimportjsonimportrandomdefsearch_doi(doi):'''根据doi查论文详细信息'''url=f'https://api.crossref.org/works/{doi}'response=requests.get(url)result=Noneifresponse.status_code==200:result=response.json()['message']else:print('Erroroccurred')returnresult#doi='https://dl.acm.org/doi/abs/10.1145/3394486.3403237'#result=search_doi(doi)#print(f"Title:{result['title'][0]}:{result['subtitle'][0]}")#print(f"Author(s):{','.join(author['given']+''+author['family']forauthorinresult['author'])}")#print(f"Journal:{result['container-title'][0]}")#print(f"PublicationDate:{result['published-print']['date-parts'][0][0]}")defsearch_cite(atid):'''根据atid查cite'''url=f'https://scholar.google.com/scholar?q=info:{atid}:scholar.google.com/&output=cite&scirp=8&hl=zh-CN'resp=requests.get(url)soup=BeautifulSoup(resp.text,'lxml')result={}foriteminsoup.find_all('tr'):cith=item.find('th',class_='gs_cith').getText()citr=item.find('div',class_='gs_citr').getText()result[cith]=citrreturnresult#result=search_cite('_goqYZv1zjMJ')#print(result)#更改节点配置defchange_clash_node(node_name=None):#ClashAPI的URL和密码url='http://127.0.0.1:15043/proxies/????国外流量'password='ee735f4e-59c6-4d60-a2ad-aabd075badb2'local_node_name=['香港1-IEPL-倍率1.0','香港2-IEPL-倍率1.0','香港3-IEPL-倍率1.0','台湾1-IEPL-倍率1.0','台湾2-IEPL-倍率1.0','台湾3-IEPL-倍率1.0','新加坡1-IEPL-倍率1.0','新加坡2-IEPL-倍率1.0','新加坡3-IEPL-倍率1.0']node_name=node_nameorrandom.choice(local_node_name)print(f'当前选择节点名称:{node_name}')headers={'Authorization':password}data={'name':'Rule','type':'Selector','now':node_name}response=requests.put(url,headers=headers,json=data)ifresponse.status_code==200:print('节点已更改为:',node_name)else:print('更改节点时出错:',response.text)#更改节点为my_node#change_clash_node()defproxy_requests(url):proxies={'http':'socks5://127.0.0.1:7890','https':'socks5://127.0.0.1:7890'}returnrequests.get(url,proxies=proxies)defsearch(title='GNN',start=0):url=f'https://scholar.google.com/scholar?start={start}&q=allintitle:+{title}&hl=zh-CN&as_sdt=0,5'resp=proxy_requests(url)soup=BeautifulSoup(resp.text,'lxml')try:papers_item=soup.find(id='gs_res_ccl_mid').find_all('div',class_='gs_scl')except:print(soup)if'captcha-form'insoup:return-1papers_info=[]forpaperinpapers_item:publisher=paper.find('div',class_='gs_or_ggsm').getText().split()[1].split('.')[0]href=paper.find('h4',class_='gs_rt').find('a').get('href')title=paper.find('h4',class_='gs_rt').find('a').getText()detail=paper.find('div',class_='gs_ri').find('div',class_='gs_a').getText()year=detail.split(',')[-1].strip()[:4]#atid=paper.find('h4',class_='gs_rt').find('a').get('data-clk-atid')#cite_info=search_cite(atid)['MLA']#cite_info_filter=list(filter(lambdax:x,map(lambdax:x.strip().strip('"').strip(),cite_info.strip().split('.'))))#author,title,publisher,year=cite_info_filterpapers_info.append({'title':title,'year':year,'publisher':publisher,'href':href})returnpapers_infoindex_start=0index_end=500index_gap=10papers_store=[]bar=tqdm(total=index_end-index_start,desc=f'From{index_start}to{index_end}')#forstartinrange(index_start,index_end,index_gap):whileindex_start<index_end:try:papers_info=search(title='GNN',start=index_start)ifpapers_info==-1:print('需要验证码,更换节点后2秒内重试')change_clash_node()time.sleep(2)continuepapers_store.extend(papers_info)exceptAttributeErrorase:print(e)breakindex_start+=index_gapbar.update(index_gap)bar.refresh()time.sleep(0.1)bar.close()df=pd.DataFrame(papers_info)print(df)df.to_csv('data.csv',index=False)
 </div> <div class="zixun-tj-product adv-bottom"></div> </div> </div> <div class="prve-next-news">
本文:Python如何实现搜索Google Scholar论文信息的详细内容,希望对您有所帮助,信息来源于网络。
上一篇:Lombok中@EqualsAndHashCode注解如何使用下一篇:

12 人围观 / 0 条评论 ↓快速评论↓

(必须)

(必须,保密)

阿狸1 阿狸2 阿狸3 阿狸4 阿狸5 阿狸6 阿狸7 阿狸8 阿狸9 阿狸10 阿狸11 阿狸12 阿狸13 阿狸14 阿狸15 阿狸16 阿狸17 阿狸18