// // Webbie.cs // // Author: // Bryan Allred // // Copyright (c) 2012 Bryan Allred // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. using System; using System.Diagnostics; using System.IO; using System.Net; namespace MediaWikiCrawl { /// /// Webbie. /// public class Webbie { /// /// Download the specified uri. /// /// /// URI. /// public static byte[] Download(Uri uri) { if (uri == null) { throw new ArgumentNullException("uri"); } try { // Configure the request. var request = WebRequest.Create(uri) as HttpWebRequest; request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"; request.KeepAlive = false; request.Timeout = 15 * 1000; using (var response = request.GetResponse().GetResponseStream()) { if (request.HaveResponse && response != null) { var memoryStream = new MemoryStream(); byte[] buffer = new byte[0x1000]; int bytes; while ((bytes = response.Read(buffer, 0, buffer.Length)) > 0) { memoryStream.Write(buffer, 0, bytes); } return memoryStream.ToArray(); } } } catch (WebException wex) { if (wex.Response != null) { using (var errorResponse = wex.Response as HttpWebResponse) { Debug.WriteLine( "[{1}] {0}", errorResponse.StatusDescription, errorResponse.StatusCode); } } } return null; } /// /// Get the specified uri. /// /// /// URI. /// public static string Get(Uri uri) { if (uri == null) { throw new ArgumentNullException("uri"); } var html = string.Empty; try { // Configure the request. var request = WebRequest.Create(uri) as HttpWebRequest; request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"; request.KeepAlive = false; request.Timeout = 15 * 1000; using (var response = request.GetResponse() as HttpWebResponse) { if (request.HaveResponse && response != null) { using (var stream = new StreamReader(response.GetResponseStream())) { html = stream.ReadToEnd(); } } } } catch (WebException wex) { if (wex.Response != null) { using (var errorResponse = wex.Response as HttpWebResponse) { Debug.WriteLine( "[{1}] {0}", errorResponse.StatusDescription, errorResponse.StatusCode); } } } return html; } /// /// Sanitize the specified input. /// /// /// Input. /// public static string Sanitize(string input) { // TODO: Provide proper sanitization!!!! return input; } } }