Skip to content

C#读取任意charset网页

private string GetPageContent(string address) { try { string result = null; HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(address); req.Method = "GET"; using (WebResponse wr = req.GetResponse()) { // 请求网页内容 Stream stream = wr.GetResponseStream(); StreamReader sr = new StreamReader(stream, Encoding.Default); result = sr.ReadToEnd();

        // 获取网页编码
        int charsetIndex = result.IndexOf("charset");
        string charset = "utf-8";
        if (charsetIndex != -1)
        {
            int right = result.IndexOf(">", charsetIndex);
            charset = result.Substring(
                charsetIndex + 7, right - charsetIndex - 7).Trim();
            charset = charset.Replace("=", "");
            charset = charset.Replace("\

", ""); charset = charset.Replace("
", ""); charset = charset.Replace(""", ""); charset = charset.Replace("'", ""); charset = charset.Replace("/", ""); charset = charset.Replace(" ", "").ToLower(); }

        // 将字符串转换成网页中指定的编码类型
        result = Encoding.GetEncoding(charset).GetString(
            Encoding.Convert(Encoding.Default,
            Encoding.GetEncoding(charset),
            Encoding.Default.GetBytes(result)));

        sr.Close();
        stream.Close();
    }
    return result;
}
catch
{
    return null;
}

}