Go homepage(回首页) Upload pictures (上传图片) Write articles (发文字帖)
The author:(作者)delvpublished in(发表于) 2014/1/24 9:02:09 .NET2.0抓取网页全部链接_[Asp.Net教程]
该方法经过对各大门户网站测试结果是抓取率100%!
效果图
后台代码:
using System;using System.Data;using System.Configuration;using System.Web;using System.Web.Security;using System.Web.UI;using System.Web.UI.WebControls;using System.Web.UI.WebControls.WebParts;using System.Web.UI.HtmlControls;using System.Text.RegularExpressions;using System.Net;using System.IO;using System.Collections;
public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { if (!IsPostBack) { } }
protected void Button1_Click(object sender, EventArgs e) { TextBox2.Text = ""; string web_url = this.TextBox1.Text;//"http://blog.csdn.net/21aspnet/" string all_code = ""; HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url); WebResponse all_codeResponse = all_codeRequest.GetResponse(); StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream()); all_code = the_Reader.ReadToEnd(); the_Reader.Close(); ArrayList my_list = new ArrayList(); string p = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"; Regex re = new Regex(p, RegexOptions.IgnoreCase); MatchCollection mc = re.Matches(all_code);
for (int i = 0; i <= mc.Count - 1; i++) { bool _foo = false; string name = mc[i].ToString(); foreach (string list in my_list) { if (name == list) { _foo = true; break; } }//过滤
if (!_foo) { TextBox2.Text += name + "\n"; } } }}
前台<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>
赞