3月30th

http协议-C#抓取网页源代码方法

DIY编程技术我来说两句!

记得很久以前网上有个人问我如何抓取网页的源代码,当时我不会。今天在阅读杜亮编写的《亲密接触ASP.NET 2.0》书的时候,突然发现这么个东西,而且简单得让我惊讶。于是我疯狂地把那个代码敲上屏幕,然后自己修改一下,果然可以抓取源代码
C#获取指定网页HTML原代码可使用 WebClient WebRequest HttpWebRequest 三种方式来实现 当然也可使用webBrowse

通过http协议C#获取网页源代码

方法一:

using System.Text;

using System.Net;

private string getHtml(string url)

{

WebClient myWebClient = new WebClient();

byte[] myDataBuffer = myWebClient.DownloadData (url);

return Encoding.Default.GetString(myDataBuffer);

}

方法二:

public string getHttp(string HttpUrl,string RefererUrl)

{

string html = "";

try

{

MSXML2.XMLHTTP Http = new MSXML2.XMLHTTPClass();

xmlhttp.open("GET", Url, false, null, null);

Http.open("GET",HttpUrl,false,null,null);

Http.setRequestHeader("Referer",RefererUrl);

//Http.setRequestHeader("Referer",RefererUrl);

Http.setRequestHeader("Content-Type", "text/html;charset=gb2312");

Http.send("");

html = Encoding.Default.GetString((byte[])Http.responseBody);

Http = null;

}

catch

{


}

return html;

}

public bool getweb(string strURL,out string buf)

  {

   buf="";

   try

   {

    //Uri url=new Uri(strURL,false);

    HttpWebRequest request;

    request = (HttpWebRequest)WebRequest.Create(strURL);

    request.Method="POST"; //Post请求方式

    request.ContentType="text/html;charset=gb2312"; //内容类型

    string paraUrlCoded = System.Web.HttpUtility.UrlEncode(""); //参数经过URL编码

    byte[] payload;

    payload = System.Text.Encoding.GetEncoding("GB2312").GetBytes(paraUrlCoded); //将URL编码后的字符串转化为字节

    request.ContentLength = payload.Length; //设置请求的ContentLength

    Stream writer = request.GetRequestStream(); //获得请求流

    writer.Write(payload,0,payload.Length); //将请求参数写入流

    writer.Close(); //关闭请求流

    HttpWebResponse response;

    response = (HttpWebResponse)request.GetResponse(); //获得响应流

    Stream s;

    s = response.GetResponseStream();

    StreamReader objReader = new StreamReader(s,System.Text.Encoding.GetEncoding("GB2312"));

    string HTML = "";

    string sLine ="";

    int i = 0;

    while (sLine!=null)

    {

     i++;

     sLine = objReader.ReadLine();

     if (sLine!=null)

      HTML += sLine;

    }

    //HTML = HTML.Replace("&lt;","<");

    //HTML = HTML.Replace("&gt;",">");

    buf=HTML;

    return true;

   }

   catch (Exception x)

   {   

    buf=x.Message.ToString();

    return false;    

   }

  }

 

带Cookie:

CookieContainer cc = new CookieContainer();

public bool getweb(string strURL,out string buf)

  {

   buf="";

   try

   {

    HttpWebRequest request;

    request = (HttpWebRequest)WebRequest.Create(strURL);

    request.Method="POST"; //Post请求方式

    request.ContentType="text/html;charset=gb2312"; //内容类型

    string paraUrlCoded = System.Web.HttpUtility.UrlEncode(""); //参数经过URL编码

    byte[] payload;

    payload = System.Text.Encoding.GetEncoding("GB2312").GetBytes(paraUrlCoded); //将URL编码后的字符串转化为字节

    request.ContentLength = payload.Length; //设置请求的ContentLength

    Stream writer = request.GetRequestStream(); //获得请求流

    writer.Write(payload,0,payload.Length); //将请求参数写入流

    writer.Close(); //关闭请求流

    HttpWebResponse response;

    response = (HttpWebResponse)request.GetResponse(); //获得响应流

    Stream s;

    s = response.GetResponseStream();

    StreamReader objReader = new StreamReader(s,System.Text.Encoding.GetEncoding("GB2312"));

    string HTML = "";

    string sLine ="";

    int i = 0;

    while (sLine!=null)

    {

     i++;

     sLine = objReader.ReadLine();

     if (sLine!=null)

      HTML += sLine;

    } 


    buf=HTML;

    return true;

   }

   catch (Exception x)

   {   

    buf=x.Message.ToString();

    return false;    

   }

  }

  public bool getweb(string strURL,out string buf,string postData)

  {

   buf="";

   try

   {       

    ASCIIEncoding encoding = new ASCIIEncoding();

    byte[] data = encoding.GetBytes(postData);

    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strURL);

    request.Method = "POST";

    request.ContentType = "application/x-www-form-urlencoded";

    request.ContentLength = data.Length;

    Stream newStream = request.GetRequestStream();

    newStream.Write(data, 0, data.Length);

    newStream.Close();

                   

    request.CookieContainer = cc;

                   

    HttpWebResponse response = (HttpWebResponse)request.GetResponse();

    cc.Add(response.Cookies);

    Stream stream = response.GetResponseStream();

    string sHtml = new StreamReader(stream, System.Text.Encoding.Default).ReadToEnd();

    buf=sHtml;

    return true;

   }

   catch (Exception x)

   {   

    buf=x.Message.ToString();

    return false;    

   }

  }


private   string   getWebresourceFile1(string   url)

    {

   

WebClient   myWebClient   =   new   WebClient();    

byte[]   myDataBuffer   =   myWebClient.DownloadData(url);

string   SourceCode   =   Encoding.Default.GetString(myDataBuffer);

saveSourceCode(SourceCode);

                  return   SourceCode;

      }

方法2

private   string   getWebresourceFile2(string   url)

    {

HttpWebRequest   request=(HttpWebRequest)WebRequest.Create(url);  

HttpWebResponse   response=(HttpWebResponse)request.GetResponse();  

                  request.Method   =   "GET ";        

Stream   receiveStream=response.GetResponseStream();

StreamReader   readStream=new   StreamReader(receiveStream,Encoding.Default);

string   SourceCode=readStream.ReadToEnd();    

saveSourceCode(SourceCode);

response.Close();      

readStream.Close();

return   SourceCode;

      }

方法3

private   string   getWebresourceFile3(string   url)

      {

WebClient   wc   =   new   WebClient();

wc.Credentials   =   CredentialCache.DefaultCredentials;

Byte[]   pageData   =   wc.DownloadData(url);

string   SourceCode   =   Encoding.Default.GetString(pageData);

saveSourceCode(SourceCode);

wc.Dispose();  

return   SourceCode;

      }

方法4

private   string   getWebresourceFile4(string   url)

    {

WebClient   wc   =   new   WebClient();

wc.Credentials   =   CredentialCache.DefaultCredentials;

Stream   resStream   =   wc.OpenRead(url);

StreamReader   sr   =   new   StreamReader(resStream,System.Text.Encoding.Default);


string   SourceCode   =   sr.ReadToEnd();

saveSourceCode(SourceCode);

resStream.Close();

wc.Dispose();  

return   SourceCode;

      }

方法5

private   string   getWebresourceFile5(string   url)

      {

WebRequest   request   =   WebRequest.Create(url);

WebResponse   response   =   request.GetResponse();

Stream   resStream   =   response.GetResponseStream();  

StreamReader   sr   =   new   StreamReader(resStream,   System.Text.Encoding.Default);

string   SourceCode   =     sr.ReadToEnd();

saveSourceCode(SourceCode);

resStream.Close();  

sr.Close();

return   SourceCode;

      }

本文出自:DIY博客园,链接:https://www.diybloghome.com/prology/185.html,转载请注明!