Java爬虫之如何实现B站粉丝取关人排查(java,编程语言)

时间:2024-05-01 22:30:18 作者 : 石家庄SEO 分类 : 编程语言
  • TAG :

1.爬虫开发准备

开发工具:Eclipse/IDEA

浏览器:Google Chrome

浏览器Selement驱动:Selenium 3.5

Jar包:

// Selenium驱动版本需要和Chrome浏览器版本对应

2.流程

  1. 获取Cookie(终端输入或者使用Selenium打开扫码登录)

  2. 请求https://api.bilibili.com/x/relation/followers接口

  3. 解析数据

  4. 存入csv

3.编码

packagecom.mm.rep;importjava.io.IOException;importjava.nio.charset.Charset;importjava.util.List;importjava.util.Scanner;importjava.util.Set;importorg.openqa.selenium.Cookie;importorg.apache.commons.httpclient.HttpClient;importorg.apache.commons.httpclient.HttpException;importorg.apache.commons.httpclient.URI;importorg.apache.commons.httpclient.methods.GetMethod;importorg.apache.commons.httpclient.params.HttpMethodParams;importorg.apache.log4j.BasicConfigurator;importorg.apache.log4j.LogManager;importorg.apache.log4j.Logger;importorg.openqa.selenium.WebDriver;importorg.openqa.selenium.chrome.ChromeDriver;importorg.openqa.selenium.chrome.ChromeOptions;importcom.alibaba.fastjson.JSON;importcom.alibaba.fastjson.JSONArray;importcom.alibaba.fastjson.JSONObject;importcom.csvreader.CsvWriter;importorg.apache.commons.lang3.StringUtils;publicclassMain{ privatestaticfinalLoggerlogger=LogManager.getLogger(Main.class); privatestaticWebDriverdriver=null; privatestaticGetMethodgetMethod=null; privatestaticSet<Cookie>bcookies=null; privatefinalstaticStringBLOGINURL="https://passport.bilibili.com/login"; privatefinalstaticStringBMAINPAGE="https://www.bilibili.com/"; Main(){ BasicConfigurator.configure(); //初始化GetMethod,设置不变的RequestHeader getMethod=newGetMethod(); getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"UTF-8"); getMethod.addRequestHeader(":authority","api.bilibili.com"); getMethod.addRequestHeader(":method","api.bilibili.com"); getMethod.addRequestHeader(":scheme","GET"); getMethod.addRequestHeader(":scheme","https"); getMethod.addRequestHeader("accept","*/*"); getMethod.addRequestHeader(":scheme","https"); getMethod.addRequestHeader("accept-language","zh-CN,zh;q=0.9"); getMethod.addRequestHeader("sec-fetch-dest","script"); getMethod.addRequestHeader("sec-fetch-mode","no-cors"); getMethod.addRequestHeader("sec-fetch-site","same-site"); getMethod.addRequestHeader("user-agent","Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/85.0.4183.83Safari/537.36"); } publicstaticStringgetCookie()throwsInterruptedException{ StringscCookie=null;Scannerip=newScanner(System.in);logger.info("请输入Cookie,如果没有请按回车:");scCookie=ip.nextLine();if(scCookie.length()!=0){ returnscCookie;}logger.info("开始扫码登录"); //设置驱动地址 System.setProperty("webdriver.chrome.driver","H:/chromedriver/chromedriver.exe"); //启动设置 ChromeOptionsoptions=newChromeOptions(); //创建ChromeDriver driver=newChromeDriver(options); //打开Bilibili登录页面 driver.get(BLOGINURL); //等待扫码登录 while(true){ if(driver.getCurrentUrl().equals(BMAINPAGE)){ break; }else{ Thread.sleep(100); } } logger.info("扫码登录成功"); //获取cookie bcookies=driver.manage().getCookies(); Stringcookie=StringUtils.join(bcookies,";"); returncookie; } publicstaticList<JSONObject>getFanS(Stringcookie,Stringvmid,intpn,intps)throwsInterruptedException,HttpException,IOException{ HttpClientclient=newHttpClient(); //拼接url StringBuffersBuffer=newStringBuffer(); sBuffer.append("https://api.bilibili.com/x/relation/followers?vmid="); sBuffer.append(vmid); sBuffer.append("&pn="); sBuffer.append(pn); sBuffer.append("&ps="); sBuffer.append(ps); sBuffer.append("&order=desc&jsonp=jsonp"); getMethod.setURI(newURI(sBuffer.toString(),true)); getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"UTF-8"); //设置请求头 getMethod.addRequestHeader("cookie",cookie); //发送请求 client.executeMethod(getMethod); //获取数据 Stringinfo=newString(getMethod.getResponseBody(),"UTF-8"); JSONObjectfans=JSONObject.parseObject(info).getJSONObject("data"); JSONArrayfArray=JSONArray.parseArray(fans.getString("list")); returnJSON.parseArray(fArray.toJSONString(),JSONObject.class); } publicstaticvoidmain(String[]args)throwsInterruptedException,HttpException{ logger.info("程序开始..."); newMain(); //获取Cookie Stringcookie=Main.getCookie(); CsvWritercsvWriter=newCsvWriter("C:\\Users\\computer\\Desktop\\aaa.csv",',',Charset.forName("UTF-8")); String[]csvHeaders={"mid","粉丝名字","粉丝签名","粉丝头像"}; try{ csvWriter.writeRecord(csvHeaders); intpn=1; booleanend=false; while(true){ for(JSONObjectf:Main.getFanS(cookie,"309103931",pn,20)){ if(f==null){ end=true; break; } String[]csvContent1={f.getString("mid"),f.getString("uname"),f.getString("sign"),f.getString("face")}; System.out.println(csvContent1); csvWriter.writeRecord(csvContent1); } pn++; Thread.sleep(100); if(end==true){ break; } } }catch(IOExceptione){ System.out.println(e); e.printStackTrace(); } csvWriter.close(); driver.close(); logger.info("程序结束"); }}

4.缺点

容易被拦截,最多获取不到1000个

Java爬虫之如何实现B站粉丝取关人排查

 </div> <div class="zixun-tj-product adv-bottom"></div> </div> </div> <div class="prve-next-news">
本文:Java爬虫之如何实现B站粉丝取关人排查的详细内容,希望对您有所帮助,信息来源于网络。
上一篇:python面向对象中函数有什么用下一篇:

4 人围观 / 0 条评论 ↓快速评论↓

(必须)

(必须,保密)

阿狸1 阿狸2 阿狸3 阿狸4 阿狸5 阿狸6 阿狸7 阿狸8 阿狸9 阿狸10 阿狸11 阿狸12 阿狸13 阿狸14 阿狸15 阿狸16 阿狸17 阿狸18