Crawl SGU
unknown
java
4 years ago
1.4 kB
6
Indexable
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
public class Main {
public static void main(String[] args) {
try {
FileOutputStream fos = new FileOutputStream("crawl.html");
OutputStreamWriter dos = new OutputStreamWriter(fos, "utf-8");
long id = 3119410001l;
while (id < 3119410509l) {
URL url = new URL("http://thongtindaotao.sgu.edu.vn/Default.aspx?page=xemhocphi&id=" + id);
URLConnection conn = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
dos.write("MSV: " + id);
String l;
boolean flag = false;
while ((l = in.readLine()) != null) {
if (l.contains("ctl00_ContentPlaceHolder1_ctl00_gvHocPhi"))
flag = true;
if (flag && l.contains("</table>")) {
dos.write(l + "\n");
flag = false;
}
if (flag || l.contains("ctl00_ContentPlaceHolder1_ctl00_ucThongTinSV_lblTenSinhVien"))
dos.write(l + "\n");
}
id++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Editor is loading...