package com.zjazn; import com.sun.org.apache.bcel.internal.generic.RETURN; import com.sun.xml.internal.ws.api.server.InstanceResolver; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import sun.net.www.http.HttpClient; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; public class Data { public static void main(String[] args) { String html = getData(); Document htmledThisDocument = Jsoup.parse(html); List<MyData> myData=new ArrayList<MyData>(); Elements courses = htmledThisDocument.select(".learn-path-container>div"); for (Element course:courses){ String courseName = course.select("a>div").first().text(); String courseNum = course.select("a>div").last().text(); if(courseNum.indexOf("门")>-1){ int num = Integer.parseInt(courseNum.substring(0, courseNum.indexOf("门"))); String imgPath = course.select("a>img").attr("src"); String fuffix = imgPath.substring(imgPath.lastIndexOf(".")); MyData myData6 = new MyData(); myData6.setName(courseName); myData6.setImgPath(imgPath); myData6.setNum(num); myData.add(myData6); downloadFile(imgPath,"E://myimg",courseName+fuffix); } } System.out.println(myData.toString()); } public static String getData(){ CloseableHttpClient httpClient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet("https://www.lanqiao.cn/paths/"); CloseableHttpResponse response=null; HttpEntity entity=null; String html=null; try { response = httpClient.execute(httpGet);//发送请求 if(response.getStatusLine().getStatusCode() ==200){ entity = response.getEntity();//获取html html= EntityUtils.toString(entity,"UTF-8");//用指定编码解析html } return html; } catch (IOException e) { e.printStackTrace(); } return null; } public static void downloadFile(String urlStr,String directory,String fileName){ FileOutputStream out =null; InputStream in=null; try { URL url=new URL(urlStr); URLConnection urlConnection = url.openConnection(); in=urlConnection.getInputStream(); byte[] buf=new byte[1000]; File dir = new File(directory); if(!dir.exists() ){ dir.mkdir(); } out=new FileOutputStream(directory+"\\"+fileName); int len=-1; while ((len=in.read(buf))!=-1){ out.write(buf,0,len); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); }finally { try { if(in != null){ in.close(); } if (out !=null){ out.close(); } } catch (IOException e) { e.printStackTrace(); } } } }
package com.zjazn; import lombok.Data; @Data public class MyData { private String name; private String imgPath; private Integer num; }
原文:https://www.cnblogs.com/zjazn/p/14188395.html