Crawl SGU
unknown
java
4 years ago
1.4 kB
3
Indexable
import java.io.*; import java.net.URL; import java.net.URLConnection; public class Main { public static void main(String[] args) { try { FileOutputStream fos = new FileOutputStream("crawl.html"); OutputStreamWriter dos = new OutputStreamWriter(fos, "utf-8"); long id = 3119410001l; while (id < 3119410509l) { URL url = new URL("http://thongtindaotao.sgu.edu.vn/Default.aspx?page=xemhocphi&id=" + id); URLConnection conn = url.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream())); dos.write("MSV: " + id); String l; boolean flag = false; while ((l = in.readLine()) != null) { if (l.contains("ctl00_ContentPlaceHolder1_ctl00_gvHocPhi")) flag = true; if (flag && l.contains("</table>")) { dos.write(l + "\n"); flag = false; } if (flag || l.contains("ctl00_ContentPlaceHolder1_ctl00_ucThongTinSV_lblTenSinhVien")) dos.write(l + "\n"); } id++; } } catch (Exception e) { e.printStackTrace(); } } }
Editor is loading...