Aim: To download a website source with using a console application. You can find the used class in the program below.
Question: I use the code below to download a data (source) of a web page. Imagine you use chrome; If you enter first this query string, the web page itself redirects you a view HTML page and you see the data.
- Entering this URL, to show the results it redirects itself to second page below. I make it by using javascript.
www.xyz.com/aaa.html?search=aaa&id=1
- it redirects here: www.xyz.com/ViewResult.html
In an explorer, It works fine . I see 4 HTML tables inside the page when I use google chrome view source option. Bu in my application I see only two tables of the 4 . The two tables inside the web page is missing.(the missing two tables are the second and third.)
How can I overcome to this problem? I want to get the source of the page as I see in chrome.
Bonus informations: There is no iframe.
The particular Code :
string url = "www.xyz.com/aaa.html?search=aaa&id=1";
WebPage pG = ss.RequestPage(url, "", "GET");
pG = ss.RequestPage("www.xyz.com/ViewResult.html");
string source= pG.Html;
public WebPage RequestPage(Uri url, string content, string method, string contentType)
{
string htmlResult;
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
HttpWebResponse response = null;
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] contentData = encoding.GetBytes(content);
request.Proxy = Proxy;
request.Timeout = 60000;
request.Method = method;
request.AllowAutoRedirect = false; // false
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Referer = LastUrl;
request.KeepAlive = true; //false,
request.UserAgent = UserAgent;
request.Headers.Add("Accept-Language", "en-us,en;q=0.5");
//request.Headers.Add("UA-CPU", "x86");
request.Headers.Add("Cache-Control", "no-cache");
request.Headers.Add("Accept-Encoding", "gzip,deflate");
String cookieString = "";
foreach (KeyValuePair<String, String> cookiePair in Cookies)
cookieString += cookiePair.Key + "=" + cookiePair.Value + ";";
if (cookieString.Length > 2)
{
String cookie = cookieString.Substring(0, cookieString.Length - 1);
request.Headers.Add("Cookie", cookie);
}
if (method == "POST")
{
request.ContentLength = contentData.Length;
request.ContentType = contentType;
Stream contentWriter = request.GetRequestStream();
contentWriter.Write(contentData, 0, contentData.Length);
contentWriter.Close();
}
int attempts = 0;
while (true)
{
try
{
response = (HttpWebResponse)request.GetResponse();
if (response == null)
throw new WebException();
break;
}
catch (WebException)
{
if (response != null)
response.Close();
if (attempts == PageReattempts)
{
throw;
}
else { }
// Wait three seconds before trying again
Thread.Sleep(3000);
}
attempts += 1;
}
// Tokenize cookies
if (response.Headers["Set-Cookie"] != null)
{
String headers = response.Headers["Set-Cookie"].Replace("path=/,", ";").Replace("HttpOnly,", "");
foreach (String cookie in headers.Split(';'))
{
if (cookie.Contains("="))
{
String[] splitCookie = cookie.Split('=');
String cookieKey = splitCookie[0].Trim();
String cookieValue = splitCookie[1].Trim();
if (Cookies.ContainsKey(cookieKey))
Cookies[cookieKey] = cookieValue;
else
Cookies.Add(cookieKey, cookieValue);
}
else
{
if (Cookies.ContainsKey(cookie))
Cookies[cookie] = "";
else
Cookies.Add(cookie, "");
}
}
}
htmlResult = ReadResponseStream(response);
response.Close();
if (response.Headers["Location"] != null)
{
response.Close();
Thread.Sleep(1500);
String newLocation = response.Headers["Location"];
WebPage result = RequestPage(newLocation);
return new WebPage(result.Html, new WebPage(htmlResult));
}
LastUrl = url.ToString();
return new WebPage(htmlResult);
}
contentproperty provided by phantom.js: phantomjs.org/api/webpage/property/content.html