Uncategorized

Download page from web with encoding

Usually you can use WebClient to download a page from web but no always it’s a good way, sometimes the page encode is different. With this code you can download without problems!

#region ” Download page “
public static string PageDownload(string url)
{
    HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);

    using (HttpWebResponse resp = (HttpWebResponse)req.GetResponse())
    {
        byte[] buffer;

        using (Stream s = resp.GetResponseStream())
        {
            buffer = ReadStream(s);
        }

        string pageEncoding = “”;
        Encoding e = Encoding.UTF8;
        if (resp.ContentEncoding != “”)
            pageEncoding = resp.ContentEncoding;
        else if (resp.CharacterSet != “”)
            pageEncoding = resp.CharacterSet;
        else if (resp.ContentType != “”)
            pageEncoding = GetCharacterSet(resp.ContentType);

        if (pageEncoding == “”)
            pageEncoding = GetCharacterSet(buffer);

        if (pageEncoding != “”)
        {
            try
            {
                e = Encoding.GetEncoding(pageEncoding);
            }
            catch
            {
                MessageBox.Show(“Invalid encoding: ” + pageEncoding);
            }
        }

        string data = e.GetString(buffer);

        string Status = “”;

        return data;
    }
}

private static string GetCharacterSet(string s)
{
    s = s.ToUpper();
    int start = s.LastIndexOf(“CHARSET”);
    if (start == -1)
        return “”;

    start = s.IndexOf(“=”, start);
    if (start == -1)
        return “”;

    start++;
    s = s.Substring(start).Trim();
    int end = s.Length;

    int i = s.IndexOf(“;”);
    if (i != -1)
        end = i;
    i = s.IndexOf(“\””);
    if (i != -1 && i < end)
        end = i;
    i = s.IndexOf(“‘”);
    if (i != -1 && i < end)
        end = i;
    i = s.IndexOf(“/”);
    if (i != -1 && i < end)
        end = i;

    return s.Substring(0, end).Trim();
}

private static string GetCharacterSet(byte[] data)
{
    string s = Encoding.Default.GetString(data);
    return GetCharacterSet(s);
}

private static byte[] ReadStream(Stream s)
{
    long CurLength;
    try
    {
        byte[] buffer = new byte[8096];
        using (MemoryStream ms = new MemoryStream())
        {
            while (true)
            {
                int read = s.Read(buffer, 0, buffer.Length);
                if (read <= 0)
                {
                    CurLength = 0;
                    return ms.ToArray();
                }
                ms.Write(buffer, 0, read);
                CurLength = ms.Length;
            }
        }
    }
    catch (Exception ex)
    {
        return null;
    }
}
#endregion

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.