效果图:
 

源码如下:
首先是Python对cvpr论文的爬取部分:爬取的网址为 http://openaccess.thecvf.com
 
import pymysql import re import requests # 连接数据库函数 def insertCvpr(value): try: db = pymysql.connect("localhost", "root", "root", "jiaoli") print("数据库连接成功!") cur = db.cursor() sql = ‘INSERT INTO cvpr(title,ab,hotword,pdf) VALUE (%s,%s,%s,%s)‘ cur.execute(sql, value) db.commit() print("增加数据成功!") except pymysql.Error as e: print("增加数据失败: " + str(e)) db.rollback() db.close() #开头 url = "http://openaccess.thecvf.com/ICCV2019.py" headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36"} res = requests.get(url,headers=headers) res.encoding = "utf-8" # 先爬取每个论文的网址 web = re.findall("""<dt class="ptitle"><br><a href="(.*?)">.*?</a></dt>""", res.text, re.S) for each in web: try: each = "http://openaccess.thecvf.com/" + each print(each) res = requests.get(each, headers=headers, timeout=(3, 7)) res.encoding = "utf-8" # 在各各论文网站中爬取详细信息 title = re.findall("""<div id="papertitle">(.*?)</div>""", res.text, re.S) ab = re.findall("""<div id="abstract" >(.*?)</div>""", res.text, re.S) pdf = re.findall("""\[<a href="\.\./\.\./(.*?)">pdf</a>\]""", res.text, re.S) if (len(title) > 0): title = title[0].replace("\n", "") ab = ab[0].replace("\n", "") pdf = "http://openaccess.thecvf.com/" + pdf[0] print(title) value = (title, ab, "", pdf) insertCvpr(value) except: print("闪过")
然后对爬取的文章进行题目和摘要的关键词分析,分析出词频最高的有效词汇:
 
import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; //2.将删除改成类名 /** * Servlet implementation class index */ @WebServlet("/input") public class input extends HttpServlet{ private static final long serialVersionUID = 1L; /** * @see HttpServlet#HttpServlet() */ public input() { super(); // TODO Auto-generated constructor stub } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); response.setContentType("text/html;charset=UTF-8"); //声明缓冲区 HttpSession session = request.getSession(); String url = "jdbc:mysql://localhost:3306/cvpr?&useSSL=false&serverTimezone=UTC&useUnicode=yes&characterEncoding=utf8"; Connection conn = null; PreparedStatement ps = null; try { Class.forName("com.mysql.cj.jdbc.Driver"); conn = DriverManager.getConnection(url, "root", "root"); } catch (ClassNotFoundException e) { response.getWriter().print("加载驱动失败"); } catch (SQLException e) { response.getWriter().print("连接数据库失败"); } StringBuffer buffer2 = new StringBuffer(); try { Statement stmt = conn.createStatement(); //1.改sql语句 ResultSet rs = stmt.executeQuery("select * from cvpr"); while (rs.next()) { String title=new String(rs.getString("title")); String ab=new String(rs.getString("ab")); buffer2.append(title); buffer2.append(ab); } }catch (SQLException e) { response.getWriter().print("查找失败"); } String file = buffer2.toString(); String[] a=file.split("[^a-zA-Z]+"); int n=a.length; int kind=0,zs=0; Object[][] b=new Object[n][2]; for(;zs<n;zs++){ int k=0; for(int i=0;i<kind;i++){ if(((String) b[i][0]).equalsIgnoreCase(a[zs])){ b[i][1]=(int)b[i][1]+1; k=1; break; } } if(k==0){ b[kind][0]=a[zs]; b[kind][1]=1; kind++; } } int max=0; int p=0,q=0; String m; String[] c=new String[1000]; int[] d=new int[1000]; for(int i=0;i<1000;i++) { c[i]=""; d[i]=0; } for(int i=0;i<kind;i++){ for(int j=0;j<kind;j++){ if((int)b[j][1]>(int)b[max][1]) {if(((String) b[j][0]).length()<4) {j=j++; continue;} if(((String) b[j][0]).equals("With")) {j++; continue;} if(((String) b[j][0]).equals("with")) {j++; continue;} if(((String) b[j][0]).equals("that")) {j++; continue;} if(((String) b[j][0]).equals("this")) {j++; continue;} if(((String) b[j][0]).equals("from")) {j++; continue;} if(((String) b[j][0]).equals("which")) {j++; continue;} else max=j; } } System.out.println(b[max][0]+"出现次数为:"+b[max][1]); if(i<30) {c[i]=(String) b[max][0]; d[i]= (int) b[max][1]; b[max][1]=0; } else b[max][1]=0; } session.setAttribute("c",c); session.setAttribute("d",d); request.getRequestDispatcher( "reciyun.jsp").forward(request,response); } }
然后是展示界面:
展示界面需要导入echarts.min.js echarts-wordcloud.min.js两个包
 
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Document</title>
</head>
<body>
<%String num[] = (String[])session.getAttribute("c");%>
<%int num2[] = (int[])session.getAttribute("d");%>
<form action="tiaozhuan" method="post">
<div id = "main" style="width: 1200px;height: 800px;"></div>
</form>
<script type="text/javascript" src = "js/echarts.min.js"></script>
<script type="text/javascript" src = "js/echarts-wordcloud.min.js"></script>
<script type="text/javascript">
         
         var worldCloudcharts=echarts.init(document.getElementById(‘main‘));
         var worldCloudoption = {
                 title: {
                     text: ‘CVPR热词‘,
                     x: ‘center‘,
                     textStyle: {
                         fontSize: 23,
                         color:‘#FFFFFF‘
                     }
 
                 },
                 tooltip: {
                     show: true
                 },
                 series: [{
                     name: ‘CVPR热词‘,
                     type: ‘wordCloud‘,
                     sizeRange: [20, 130],
                     rotationRange: [-45, 90],
                     textPadding: 0,
                     autoSize: {
                         enable: true,
                         minSize: 10
                     },
                     textStyle: {
                         normal: {
                             color: function() {
                                 return ‘rgb(‘ + [
                                     Math.round(Math.random() * 160),
                                     Math.round(Math.random() * 160),
                                     Math.round(Math.random() * 160)
                                 ].join(‘,‘) + ‘)‘;
                             }
                         },
                         emphasis: {
                             shadowBlur: 10,
                             shadowColor: ‘#333‘
                         }
                     },
                     data: [{
                         name: "Jayfee",
                         value: 666
                     }, {
                         name: "Nancy",
                         value: 520
                     }]
                 }]
             };
 
             var JosnList = [];
 
             JosnList.push(
                     <%for(int i=0;i<29;i++)
                     {
                     %>
                     {name: "<%=num[i]%>",value: <%=num2[i]%>,url:‘tiaozhuan?title=<%=num[i]%>‘},
                     <%
                     }
                     %>
                     {name: "<%=num[99]%>",value: <%=num2[29]%>,url:‘tiaozhuan?title=<%=num[29]%>‘}
                     );
         worldCloudoption.series[0].data = JosnList;
         worldCloudcharts.setOption(worldCloudoption);
         worldCloudcharts.on("click",function(e){
             console.log(e);
             window.open(e.data.url);
         });
     </script>
</body>
</html>
最后对关键词在数据库进行模糊查询把相关文章信息返回jsp中展示:
 
import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import com.cvpr.lun.user; //2.将删除改成类名 /** * Servlet implementation class index */ @WebServlet("/tiaozhuan") public class tiaozhuan extends HttpServlet{ private static final long serialVersionUID = 1L; /** * @see HttpServlet#HttpServlet() */ public tiaozhuan() { super(); // TODO Auto-generated constructor stub } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); response.setContentType("text/html;charset=UTF-8"); //声明list ArrayList<user> list = new ArrayList(); //声明缓冲区 HttpSession session = request.getSession(); String url = "jdbc:mysql://localhost:3306/cvpr?&useSSL=false&serverTimezone=UTC&useUnicode=yes&characterEncoding=utf8"; Connection conn = null; PreparedStatement ps = null; try { Class.forName("com.mysql.cj.jdbc.Driver"); conn = DriverManager.getConnection(url, "root", "root"); } catch (ClassNotFoundException e) { response.getWriter().print("加载驱动失败"); } catch (SQLException e) { response.getWriter().print("连接数据库失败"); } String name=request.getParameter("title"); System.out.printf(name); try { Statement stmt = conn.createStatement(); //1.改sql语句 ResultSet rs = stmt.executeQuery("select * from cvpr where title like ‘%"+name+"%‘ "); while (rs.next()) { String title=new String(rs.getString("title")); String ab=new String(rs.getString("ab")); String pdf=new String(rs.getString("pdf")); user use2=new user(title,ab,pdf); list.add(use2); System.out.printf(title); } }catch (SQLException e) { response.getWriter().print("查找失败"); } request.setAttribute("list",list); request.getRequestDispatcher( "tiaozhuan.jsp").forward(request,response); //*************************************** } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
 
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Document</title>
</head>
<body>
    <c:forEach items="${list}" var="use2" >    
        书名:${use2.title}<br/>
        摘要:${use2.ab}<br/>
        连接:<a href =${use2.pdf} >查看原文</a><br/>
    </c:forEach>
</body>
</html>
原文:https://www.cnblogs.com/dwx8845/p/12714894.html