有需求,查2022年11月以来的某个博主发的博文,导入到Excel中
1、导包 #
用到hutools的HttpUtil和alibaba的EasyExcel
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version> 5.3.4</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
2、查看接口地址 #
来到某个博主的个人页面,F12打开控制台,切换到网络,刷新页面,找到红色的这个请求,他就是获取博文的接口,uid就是这个博主的id。
复制url,以及cookie
3、写代码获取博文 #
public static void main(String[] args) throws ParseException {
String url = "https://weibo.com/ajax/statuses/mymblog?uid=1642512402&page=%s&feature=0";
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
//爬到这个日期就停下来
Date stopDate = dateFormat.parse("2022-11-01 00:00:00");
for (int i = 1;; i++) {
try {
HttpResponse response = HttpUtil.createGet(String.format(url, i))
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0")
.header("Cookie", "你的cookie,最好登录一下再复制")
.execute();
JSONArray list = JSONObject.parseObject(response.body()).getJSONObject("data")
.getJSONArray("list");
boolean stop = false;
for (Object o : list) {
JSONObject data = (JSONObject) o;
Date created = new Date(data.getString("created_at"));
//到指定日期了,就可以结束了
if (created.before(stopDate)) {
stop = true;
break;
}
//替换掉sb超链接、图片啥的
String text = data.getString("text").replaceAll("<[^<>]*>", "");
String repost = data.getString("reposts_count");
String comment = data.getString("comments_count");
String like = data.getString("attitudes_count");
System.out.println(dateFormat.format(created) + " " + text);
}
response.close();
if (stop) {
break;
}
//记得sleep一会,不然会被禁ip几分钟
Thread.sleep(700);
} catch (Exception e) {
e.printStackTrace();
}
}
}
4、写Excel #
首先创建ExcelData类
import com.alibaba.excel.annotation.ExcelProperty;
import lombok.Data;
import lombok.experimental.Accessors;
import java.util.Date;
@Data
@Accessors(chain = true)
public class ExcelData {
@ExcelProperty("发布日期")
private Date date;
@ExcelProperty("内容")
private String content;
@ExcelProperty("点赞")
private Long like;
@ExcelProperty("评论")
private Long comment;
@ExcelProperty("转发")
private Long repost;
}
然后加入写excel的代码
public static void main(String[] args) throws ParseException {
String url = "https://weibo.com/ajax/statuses/mymblog?uid=1699432410&page=%s&feature=0";
List<ExcelData> excelDataList = new ArrayList<>();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date stopDate = dateFormat.parse("2022-11-01 00:00:00");
for (int i = 1;; i++) {
try {
HttpResponse response = HttpUtil.createGet(String.format(url, i))
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0")
.header("Cookie", "你的cookie")
.execute();
JSONArray list = JSONObject.parseObject(response.body()).getJSONObject("data")
.getJSONArray("list");
boolean stop = false;
for (Object o : list) {
JSONObject data = (JSONObject) o;
Date created = new Date(data.getString("created_at"));
if (created.before(stopDate)) {
stop = true;
break;
}
String text = data.getString("text").replaceAll("<[^<>]*>", "");
String repost = data.getString("reposts_count");
String comment = data.getString("comments_count");
String like = data.getString("attitudes_count");
System.out.println(dateFormat.format(created));
ExcelData excelData = new ExcelData()
.setDate(created)
.setLike(Long.parseLong(like))
.setComment(Long.parseLong(comment))
.setRepost(Long.parseLong(repost))
.setContent(text);
excelDataList.add(excelData);
}
response.close();
if (stop) {
break;
}
Thread.sleep(700);
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("读取完毕开始写数据");
EasyExcel.write("D:/新华社.xlsx", ExcelData.class)
.sheet("Sheet1")
.doWrite(excelDataList);
}
如果像下面这样报错,在vm options里添加
--add-opens java.base/java.lang=ALL-UNNAMED
具体报错内容:
com.alibaba.excel.exception.ExcelGenerateException: java.lang.ExceptionInInitializerError
at com.alibaba.excel.write.ExcelBuilderImpl.addContent(ExcelBuilderImpl.java:64)
at com.alibaba.excel.ExcelWriter.write(ExcelWriter.java:161)
at com.alibaba.excel.write.ExcelBuilderImpl.addContent(ExcelBuilderImpl.java:58)
... 29 more
Caused by: java.lang.reflect.InaccessibleObjectException: Unable to make protected final java.lang.Class java.lang.ClassLoader.defineClass(java.lang.String,byte[],int,int,java.security.ProtectionDomain) throws java.lang.ClassFormatError accessible: module java.base does not "opens java.lang" to unnamed module @5b80350b
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:357)
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:297)
at java.base/java.lang.reflect.Method.checkCanSetAccessible(Method.java:199)
at java.base/java.lang.reflect.Method.setAccessible(Method.java:193)
at net.sf.cglib.core.ReflectUtils$2.run(ReflectUtils.java:56)
at java.base/java.security.AccessController.doPrivileged(AccessController.java:312)
at net.sf.cglib.core.ReflectUtils.<clinit>(ReflectUtils.java:46)
... 41 more