Java - 크롤링, crawling
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import javax.net.ssl.HttpsURLConnection;
public class CrawApp {
public static void crawling() {
String url = "https://웹페이지주소";
try {
URL obj = new URL(url);
HttpsURLConnection con = (HttpsURLConnection) obj.openConnection();
//연결이 완료되었음.
//1byte받음
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "EUC-KR")) ;
String input;
StringBuilder sb = new StringBuilder();
while ((input = in.readLine()) != null) {
sb.append(input);
}
in.close();
System.out.println(sb.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
crawling();
}
}
크롤링 - 크롬에서 아래 명령어로 파싱 가능
document.querySelector('#articleTitle');
<h3 id=?"articleTitle" class=?"tts_head">?굴착기 이용 GP철거 오늘 시작…김용우 육군총장 현장방문?</h3>?
var article = document.querySelector('#articleTitle');
article.textContent
"굴착기 이용 GP철거 오늘 시작…김용우 육군총장 현장방문"
var txt = document.querySelectorAll('.lo_txt');
txt[0].textContent
"273"