C#读取任意charset网页

[code=’c#’]
private string GetPageContent(string address)
{
try
{
string result = null;
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(address);
req.Method = “GET”;
using (WebResponse wr = req.GetResponse())
{
// 请求网页内容
Stream stream = wr.GetResponseStream();
StreamReader sr = new StreamReader(stream, Encoding.Default);
result = sr.ReadToEnd();

// 获取网页编码
int charsetIndex = result.IndexOf(“charset”);
string charset = “utf-8”;
if (charsetIndex != -1)
{
int right = result.IndexOf(“>”, charsetIndex);
charset = result.Substring(
charsetIndex + 7, right – charsetIndex – 7).Trim();
charset = charset.Replace(“=”, “”);
charset = charset.Replace(“\r”, “”);
charset = charset.Replace(“\n”, “”);
charset = charset.Replace(“\””, “”);
charset = charset.Replace(“‘”, “”);
charset = charset.Replace(“/”, “”);
charset = charset.Replace(” “, “”).ToLower();
}

// 将字符串转换成网页中指定的编码类型
result = Encoding.GetEncoding(charset).GetString(
Encoding.Convert(Encoding.Default,
Encoding.GetEncoding(charset),
Encoding.Default.GetBytes(result)));

sr.Close();
stream.Close();
}
return result;
}
catch
{
return null;
}
}
[/code]

发表评论