java抓取邮箱号码 java抓取网页或文件中的邮箱号码
java大渣渣 人气:0想了解java抓取网页或文件中的邮箱号码的相关内容吗,java大渣渣在本文为您仔细讲解java抓取邮箱号码的相关知识和一些Code实例,欢迎阅读和指正,我们先划重点:java抓取网页中邮箱号码,java抓取文件中邮箱号码,java抓取邮箱号码,下面大家一起来学习吧。
java抓取文件中邮箱号码的具体代码
package reg; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestEmail { public static void main(String[] args) { // TODO Auto-generated method stub BufferedReader br=null; try { br=new BufferedReader(new FileReader("D:/1.htm")); String str=null; StringBuilder sb=new StringBuilder(); while((str=br.readLine())!=null){ sb.append(str); } List es=getEmail(sb.toString()); for(String e:es){ System.out.println(e); } } catch (FileNotFoundException e) { // TODO: handle exception e.printStackTrace(); }catch (IOException e) { // TODO: handle exception e.printStackTrace(); }finally { try { if(br!=null) br.close(); } catch (IOException e) { // TODO: handle exception e.printStackTrace(); } } } public static List getEmail(String str){ List es=new ArrayList(); Pattern p=Pattern.compile("[\\w\\.-]*\\w+@[\\w\\.-]*\\w+\\.\\w{2,5}"); // Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"); Matcher m=p.matcher(str); while(m.find()){ es.add(m.group()); } return es; } }
java抓取网页中邮箱号码的具体代码
package reg; import java.io.BufferedReader; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Testemail01 { public static String getWebCon(String domain) { System.out.println("开始抓取邮件地址..("+domain+")"); StringBuffer sb=new StringBuffer(); try { java.net.URL url=new java.net.URL(domain); BufferedReader in=new BufferedReader(new InputStreamReader(url.openStream())); String line; while((line=in.readLine())!=null) { parse(line); } in.close(); } catch(Exception e) { sb.append(e.toString()); System.err.println(e); } return sb.toString(); } public static void main(String[] args) { String s=Testemail01.getWebCon("http://tieba.baidu.com/p/2366935784"); } private static void parse(String line) { Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");//邮箱的正则表达式 Matcher m=p.matcher(line); while(m.find()) { System.out.println(m.group()); } } }
加载全部内容