瀏覽代碼

Initial commit. Currently the CLI will pull and store all images within the specified MediaWiki.

bmallred 12 年之前
父節點
當前提交
49d8475cf9

+ 19 - 0
MIT.txt

@ -0,0 +1,19 @@
1
Copyright (c) 2012 Bryan M. Allred
2

3
Permission is hereby granted, free of charge, to any person obtaining a copy 
4
of this software and associated documentation files (the "Software"), to 
5
deal in the Software without restriction, including without limitation the 
6
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 
7
sell copies of the Software, and to permit persons to whom the Software is 
8
furnished to do so, subject to the following conditions:
9

10
The above copyright notice and this permission notice shall be included in all 
11
copies or substantial portions of the Software.
12

13
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
14
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
15
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
16
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
17
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
18
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
19
DEALINGS IN THE SOFTWARE.

+ 27 - 0
MediaWikiCrawl.Tests/AssemblyInfo.cs

@ -0,0 +1,27 @@
1
using System.Reflection;
2
using System.Runtime.CompilerServices;
3
4
// Information about this assembly is defined by the following attributes. 
5
// Change them to the values specific to your project.
6
7
[assembly: AssemblyTitle("MediaWikiCrawl.Tests")]
8
[assembly: AssemblyDescription("")]
9
[assembly: AssemblyConfiguration("")]
10
[assembly: AssemblyCompany("")]
11
[assembly: AssemblyProduct("")]
12
[assembly: AssemblyCopyright("Bryan Allred")]
13
[assembly: AssemblyTrademark("")]
14
[assembly: AssemblyCulture("")]
15
16
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
17
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
18
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
19
20
[assembly: AssemblyVersion("1.0.*")]
21
22
// The following attributes are used to specify the signing key for the assembly, 
23
// if desired. See the Mono documentation for more information about signing.
24
25
//[assembly: AssemblyDelaySign(false)]
26
//[assembly: AssemblyKeyFile("")]
27

+ 82 - 0
MediaWikiCrawl.Tests/MediaWikiApiTests.cs

@ -0,0 +1,82 @@
1
//
2
// MediaWikiApiTests.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
using System.Diagnostics;
28
using NUnit.Framework;
29
using MediaWikiCrawl;
30
31
namespace MediaWikiCrawl.Tests
32
{
33
	[TestFixture]
34
	public class MediaWikiApiTests
35
	{
36
		private string _baseUri;
37
38
		[TestFixtureSetUp]
39
		public void SetUp()
40
		{
41
			this._baseUri = @"http://wiki.guildwars2.com/api.php";
42
		}
43
44
		[Test]
45
		public void CanGetAllImages()
46
		{
47
			var count = 0;
48
			var wikiApi = new MediaWikiApi(this._baseUri);
49
50
			foreach (var img in wikiApi.AllImages("z"))
51
			{
52
				count++;
53
				Debug.WriteLine(
54
					"{0}{1}{2}  {3}",
55
					count.ToString().PadRight(20),
56
					img.Name,
57
					Environment.NewLine,
58
					img.Url);
59
			}
60
61
			Assert.That(count, Is.GreaterThan(0));
62
		}
63
64
		[Test]
65
		public void CanGetWikiImage()
66
		{
67
			var wikiApi = new MediaWikiApi(this._baseUri);
68
			var result = wikiApi.PageImage("File:Hall of Monuments background.jpg");
69
			Assert.That(result, Is.InstanceOf<WikiImage>());
70
		}
71
72
		[Test]
73
		public void CanGetWikiPage()
74
		{
75
			var wikiApi = new MediaWikiApi(this._baseUri);
76
			var result = wikiApi.Page("Main Page");
77
			Assert.That(result, Is.InstanceOf<WikiPage>());
78
			Assert.That(result.ImageTitles, Is.Not.Null);
79
			Assert.That(result.ImageTitles, Has.None.Null.Or.Empty);
80
		}
81
	}
82
}

+ 52 - 0
MediaWikiCrawl.Tests/MediaWikiCrawl.Tests.csproj

@ -0,0 +1,52 @@
1
<?xml version="1.0" encoding="utf-8"?>
2
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3
  <PropertyGroup>
4
    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5
    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
6
    <ProductVersion>10.0.0</ProductVersion>
7
    <SchemaVersion>2.0</SchemaVersion>
8
    <ProjectGuid>{C18684AF-C38D-4BCE-86FE-21F59463D5EA}</ProjectGuid>
9
    <OutputType>Library</OutputType>
10
    <RootNamespace>MediaWikiCrawl.Tests</RootNamespace>
11
    <AssemblyName>MediaWikiCrawl.Tests</AssemblyName>
12
  </PropertyGroup>
13
  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
14
    <DebugSymbols>true</DebugSymbols>
15
    <DebugType>full</DebugType>
16
    <Optimize>false</Optimize>
17
    <OutputPath>bin\Debug</OutputPath>
18
    <DefineConstants>DEBUG;</DefineConstants>
19
    <ErrorReport>prompt</ErrorReport>
20
    <WarningLevel>4</WarningLevel>
21
    <ConsolePause>false</ConsolePause>
22
  </PropertyGroup>
23
  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
24
    <DebugType>none</DebugType>
25
    <Optimize>true</Optimize>
26
    <OutputPath>bin\Release</OutputPath>
27
    <ErrorReport>prompt</ErrorReport>
28
    <WarningLevel>4</WarningLevel>
29
    <ConsolePause>false</ConsolePause>
30
  </PropertyGroup>
31
  <ItemGroup>
32
    <Reference Include="System" />
33
    <Reference Include="nunit.framework, Version=2.6.0.12051, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77">
34
      <Private>False</Private>
35
    </Reference>
36
    <Reference Include="nunit.core, Version=2.6.0.12051, Culture=neutral, PublicKeyToken=96d09a1eb7f44a77">
37
      <Private>False</Private>
38
    </Reference>
39
  </ItemGroup>
40
  <ItemGroup>
41
    <Compile Include="AssemblyInfo.cs" />
42
    <Compile Include="MediaWikiApiTests.cs" />
43
    <Compile Include="WebbieTests.cs" />
44
  </ItemGroup>
45
  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
46
  <ItemGroup>
47
    <ProjectReference Include="..\MediaWikiCrawl\MediaWikiCrawl.csproj">
48
      <Project>{F06013A9-2F92-4AD7-9D2A-3545E85E9876}</Project>
49
      <Name>MediaWikiCrawl</Name>
50
    </ProjectReference>
51
  </ItemGroup>
52
</Project>

+ 44 - 0
MediaWikiCrawl.Tests/WebbieTests.cs

@ -0,0 +1,44 @@
1
//
2
// WebbieTests.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
using NUnit.Framework;
28
29
namespace MediaWikiCrawl.Tests
30
{
31
	[TestFixture()]
32
	public class WebbieTests
33
	{
34
		[Test]
35
		public void CanDownloadImage()
36
		{
37
			var imageUrl = @"http://wiki.guildwars2.com/images/f/fe/Hall_of_Monuments_background.jpg";
38
			var result = Webbie.Download(new Uri(imageUrl));
39
			Assert.That(result, Is.Not.Null);
40
			Assert.That(result, Is.InstanceOf<byte[]>());
41
			Assert.That(result, Has.Length.GreaterThanOrEqualTo(0));
42
		}
43
	}
44
}

+ 49 - 0
MediaWikiCrawl.sln

@ -0,0 +1,49 @@
1

2
Microsoft Visual Studio Solution File, Format Version 11.00
3
# Visual Studio 2010
4
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MediaWikiCrawl", "MediaWikiCrawl\MediaWikiCrawl.csproj", "{F06013A9-2F92-4AD7-9D2A-3545E85E9876}"
5
EndProject
6
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MediaWikiCrawl.Tests", "MediaWikiCrawl.Tests\MediaWikiCrawl.Tests.csproj", "{C18684AF-C38D-4BCE-86FE-21F59463D5EA}"
7
EndProject
8
Global
9
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10
		Debug|x86 = Debug|x86
11
		Release|x86 = Release|x86
12
	EndGlobalSection
13
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14
		{C18684AF-C38D-4BCE-86FE-21F59463D5EA}.Debug|x86.ActiveCfg = Debug|Any CPU
15
		{C18684AF-C38D-4BCE-86FE-21F59463D5EA}.Debug|x86.Build.0 = Debug|Any CPU
16
		{C18684AF-C38D-4BCE-86FE-21F59463D5EA}.Release|x86.ActiveCfg = Release|Any CPU
17
		{C18684AF-C38D-4BCE-86FE-21F59463D5EA}.Release|x86.Build.0 = Release|Any CPU
18
		{F06013A9-2F92-4AD7-9D2A-3545E85E9876}.Debug|x86.ActiveCfg = Debug|x86
19
		{F06013A9-2F92-4AD7-9D2A-3545E85E9876}.Debug|x86.Build.0 = Debug|x86
20
		{F06013A9-2F92-4AD7-9D2A-3545E85E9876}.Release|x86.ActiveCfg = Release|x86
21
		{F06013A9-2F92-4AD7-9D2A-3545E85E9876}.Release|x86.Build.0 = Release|x86
22
	EndGlobalSection
23
	GlobalSection(MonoDevelopProperties) = preSolution
24
		StartupItem = MediaWikiCrawl\MediaWikiCrawl.csproj
25
		Policies = $0
26
		$0.DotNetNamingPolicy = $1
27
		$1.DirectoryNamespaceAssociation = PrefixedHierarchical
28
		$1.ResourceNamePolicy = FileName
29
		$0.TextStylePolicy = $2
30
		$2.inheritsSet = null
31
		$2.scope = text/x-csharp
32
		$0.CSharpFormattingPolicy = $3
33
		$3.IndentSwitchBody = True
34
		$3.inheritsSet = Mono
35
		$3.inheritsScope = text/x-csharp
36
		$3.scope = text/x-csharp
37
		$0.TextStylePolicy = $4
38
		$4.FileWidth = 120
39
		$4.TabsToSpaces = False
40
		$4.inheritsSet = VisualStudio
41
		$4.inheritsScope = text/plain
42
		$4.scope = text/plain
43
		$0.StandardHeader = $5
44
		$5.Text = @\n${FileName}\n \nAuthor:\n      ${AuthorName} <${AuthorEmail}>\n\nCopyright (c) ${Year} ${CopyrightHolder}\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the "Software"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.
45
		$5.IncludeInNewFiles = True
46
		$0.VersionControlPolicy = $6
47
		$6.inheritsSet = Mono
48
	EndGlobalSection
49
EndGlobal

+ 27 - 0
MediaWikiCrawl/AssemblyInfo.cs

@ -0,0 +1,27 @@
1
using System.Reflection;
2
using System.Runtime.CompilerServices;
3
4
// Information about this assembly is defined by the following attributes. 
5
// Change them to the values specific to your project.
6
7
[assembly: AssemblyTitle("MediaWikiCrawl")]
8
[assembly: AssemblyDescription("")]
9
[assembly: AssemblyConfiguration("")]
10
[assembly: AssemblyCompany("")]
11
[assembly: AssemblyProduct("")]
12
[assembly: AssemblyCopyright("Bryan Allred")]
13
[assembly: AssemblyTrademark("")]
14
[assembly: AssemblyCulture("")]
15
16
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
17
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
18
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
19
20
[assembly: AssemblyVersion("1.0.*")]
21
22
// The following attributes are used to specify the signing key for the assembly, 
23
// if desired. See the Mono documentation for more information about signing.
24
25
//[assembly: AssemblyDelaySign(false)]
26
//[assembly: AssemblyKeyFile("")]
27

+ 97 - 0
MediaWikiCrawl/Main.cs

@ -0,0 +1,97 @@
1
//
2
// Main.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
using System.IO;
28
29
namespace MediaWikiCrawl
30
{
31
	/// <summary>
32
	/// Main class.
33
	/// </summary>
34
	public class MainClass
35
	{
36
		/// <summary>
37
		/// The _base URI.
38
		/// </summary>
39
		private static string _baseUri = string.Empty;
40
41
		/// <summary>
42
		/// The entry point of the program, where the program control starts and ends.
43
		/// </summary>
44
		/// <param name='args'>
45
		/// The command-line arguments.
46
		/// </param>
47
		public static void Main (string[] args)
48
		{
49
			// Go through any arguments.
50
			for (var idx = 0; idx < args.Length; idx++)
51
			{
52
				switch (args[idx].ToLower())
53
				{
54
					default:
55
						_baseUri = args[idx];
56
						break;
57
				}
58
			}
59
60
			// Validate inputs.
61
			if (string.IsNullOrWhiteSpace(_baseUri))
62
			{
63
				throw new ArgumentNullException("baseUri", "Missing the base URI!");
64
			}
65
66
			// Create a new API handler.
67
			var api = new MediaWikiApi(_baseUri);
68
69
			// Iterate through captured images and commit them to permanent storage.
70
			foreach (var img in api.AllImages())
71
			{
72
				var file = new FileInfo(img.Name);
73
				var bytes = img.Download();
74
75
				if (bytes != null && bytes.Length > 0)
76
				{
77
					using (var stream = file.OpenWrite())
78
					{
79
						// Attempt to write if we can.
80
						if (stream.CanWrite)
81
						{
82
							stream.Write(bytes, 0, bytes.Length);
83
84
							Console.WriteLine(
85
								"{0}, {1} bytes",
86
								file.Length,
87
								file.FullName);
88
						}
89
90
						// Always close the file.
91
						stream.Close();
92
					}
93
             	}
94
			}
95
		}
96
	}
97
}

+ 222 - 0
MediaWikiCrawl/MediaWikiApi.cs

@ -0,0 +1,222 @@
1
//
2
// MediaWikiApi.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
using System.Collections.Generic;
28
using System.Linq;
29
using System.Threading;
30
using System.Xml.Linq;
31
32
namespace MediaWikiCrawl
33
{
34
	/// <summary>
35
	/// Media wiki API.
36
	/// </summary>
37
	public class MediaWikiApi
38
	{
39
		/// <summary>
40
		/// The seed.
41
		/// </summary>
42
		private static int seed = 42;
43
44
		/// <summary>
45
		/// The _base URI.
46
		/// </summary>
47
		private Uri _baseUri;
48
49
		/// <summary>
50
		/// Initializes a new instance of the <see cref="MediaWikiCrawl.MediaWikiApi"/> class.
51
		/// </summary>
52
		/// <param name='baseUri'>
53
		/// Base URI.
54
		/// </param>
55
		public MediaWikiApi(string baseUri = "/api.php")
56
		{
57
			if (!baseUri.ToLower().Contains("/api.php"))
58
			{
59
				baseUri += @"/api.php";
60
			}
61
62
			this._baseUri = new Uri(baseUri);
63
		}
64
65
		/// <summary>
66
		/// Alls the images.
67
		/// </summary>
68
		/// <returns>
69
		/// The images.
70
		/// </returns>
71
		/// <param name='startsWith'>
72
		/// Starts with.
73
		/// </param>
74
		public IEnumerable<WikiImage> AllImages(string startsWith = "0")
75
		{
76
			// Formulate the URI.
77
			var uri = new Uri(string.Format(
78
				"{0}?format={1}&action={2}&list={3}&aifrom={4}",
79
				this._baseUri.ToString(),
80
				Webbie.Sanitize("xml"),
81
				Webbie.Sanitize("query"),
82
				Webbie.Sanitize("allimages"),
83
				Webbie.Sanitize(startsWith)));
84
85
			// Find the root of the API (it is re-used so we cache it).
86
			var xmlApi = XDocument.Parse(Webbie.Get(uri)).Descendants("api");
87
88
			// Navigate the XML to formulate some images!
89
			var images = xmlApi
90
					.Descendants("query")
91
					.Descendants("allimages")
92
			        .Descendants("img")
93
					.Select(x => new WikiImage {
94
						Name = x.Attribute("name").Value,
95
						Url = new Uri(x.Attribute("url").Value),
96
						DescriptionUrl = new Uri(x.Attribute("descriptionurl").Value)
97
					})
98
					.ToList();
99
100
			// Return what we can when we can.
101
			foreach (var img in images)
102
			{
103
				yield return img;
104
			}
105
106
			// Determine what to continue with.
107
			var continueWith = xmlApi.Descendants("query-continue").Count() == 0
108
				? string.Empty
109
				: xmlApi
110
					.Descendants("query-continue")
111
					.Descendants("allimages")
112
					.Select(x => x.Attribute("aifrom").Value)
113
					.First();
114
115
			// If there is something left then recursively grab it.
116
			if (!string.IsNullOrWhiteSpace(continueWith))
117
			{
118
				// Give a moment to reflect.
119
				seed = new Random(seed).Next() % 100;
120
				new AutoResetEvent(true).WaitOne(new TimeSpan(0, 0, seed));
121
122
				foreach (var img in AllImages(continueWith))
123
				{
124
					yield return img;
125
				}
126
			}
127
		}
128
129
		/// <summary>
130
		/// Page the specified title.
131
		/// </summary>
132
		/// <param name='title'>
133
		/// Title.
134
		/// </param>
135
		public WikiPage Page(string title)
136
		{
137
			if (string.IsNullOrWhiteSpace(title))
138
			{
139
				throw new ArgumentNullException("title");
140
			}
141
142
			return new WikiPage {
143
				Title = title,
144
				ImageTitles = this.PageImages(title)
145
			};
146
		}
147
148
		/// <summary>
149
		/// Pages the image.
150
		/// </summary>
151
		/// <returns>
152
		/// The image.
153
		/// </returns>
154
		/// <param name='title'>
155
		/// Title.
156
		/// </param>
157
		public WikiImage PageImage(string title)
158
		{
159
			if (string.IsNullOrWhiteSpace(title))
160
			{
161
				throw new ArgumentNullException("title");
162
			}
163
164
			var uri = new Uri(string.Format(
165
				"{0}?format={1}&action={2}&titles={3}&prop={4}&iiprop={5}",
166
				this._baseUri.ToString(),
167
				Webbie.Sanitize("xml"),
168
				Webbie.Sanitize("query"),
169
				Webbie.Sanitize(title),
170
				Webbie.Sanitize("imageinfo"),
171
				Webbie.Sanitize("url")));
172
173
			return (XDocument.Parse(Webbie.Get(uri))
174
			        .Descendants("api")
175
			        .Descendants("query")
176
			        .Descendants("pages")
177
			        .Descendants("page")
178
			        .Descendants("imageinfo")
179
		        	.Descendants("ii")
180
			        .Select(x => new WikiImage { 
181
						Url = new Uri(x.Attribute("url").Value ?? string.Empty), 
182
						DescriptionUrl = new Uri(x.Attribute("descriptionurl").Value ?? string.Empty) 
183
					}))
184
					.First();
185
		}
186
187
		/// <summary>
188
		/// Pages the images.
189
		/// </summary>
190
		/// <returns>
191
		/// The images.
192
		/// </returns>
193
		/// <param name='title'>
194
		/// Title.
195
		/// </param>
196
		public ICollection<string> PageImages(string title)
197
		{
198
			if (string.IsNullOrWhiteSpace(title))
199
			{
200
				throw new ArgumentNullException("title");
201
			}
202
203
			var uri = new Uri(string.Format(
204
				"{0}?format={1}&action={2}&titles={3}&prop={4}",
205
				this._baseUri.ToString(),
206
				Webbie.Sanitize("xml"),
207
				Webbie.Sanitize("query"),
208
				Webbie.Sanitize(title),
209
				Webbie.Sanitize("images")));
210
211
			return XDocument.Parse(Webbie.Get(uri))
212
					.Descendants("api")
213
			        .Descendants("query")
214
			        .Descendants("pages")
215
			        .Descendants("page")
216
			        .Descendants("images")
217
		        	.Descendants("im")
218
			        .Select(x => x.Attribute("title").Value)
219
					.ToList();
220
		}
221
	}
222
}

+ 47 - 0
MediaWikiCrawl/MediaWikiCrawl.csproj

@ -0,0 +1,47 @@
1
<?xml version="1.0" encoding="utf-8"?>
2
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3
  <PropertyGroup>
4
    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5
    <Platform Condition=" '$(Platform)' == '' ">x86</Platform>
6
    <ProductVersion>10.0.0</ProductVersion>
7
    <SchemaVersion>2.0</SchemaVersion>
8
    <ProjectGuid>{F06013A9-2F92-4AD7-9D2A-3545E85E9876}</ProjectGuid>
9
    <OutputType>Exe</OutputType>
10
    <RootNamespace>MediaWikiCrawl</RootNamespace>
11
    <AssemblyName>MediaWikiCrawl</AssemblyName>
12
    <StartupObject>MediaWikiCrawl.MainClass</StartupObject>
13
  </PropertyGroup>
14
  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
15
    <DebugSymbols>true</DebugSymbols>
16
    <DebugType>full</DebugType>
17
    <Optimize>false</Optimize>
18
    <OutputPath>bin\Debug</OutputPath>
19
    <DefineConstants>DEBUG;</DefineConstants>
20
    <ErrorReport>prompt</ErrorReport>
21
    <WarningLevel>4</WarningLevel>
22
    <PlatformTarget>x86</PlatformTarget>
23
    <Externalconsole>true</Externalconsole>
24
  </PropertyGroup>
25
  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
26
    <DebugType>none</DebugType>
27
    <Optimize>true</Optimize>
28
    <OutputPath>bin\Release</OutputPath>
29
    <ErrorReport>prompt</ErrorReport>
30
    <WarningLevel>4</WarningLevel>
31
    <PlatformTarget>x86</PlatformTarget>
32
    <Externalconsole>true</Externalconsole>
33
  </PropertyGroup>
34
  <ItemGroup>
35
    <Reference Include="System" />
36
    <Reference Include="System.Xml.Linq" />
37
  </ItemGroup>
38
  <ItemGroup>
39
    <Compile Include="Main.cs" />
40
    <Compile Include="AssemblyInfo.cs" />
41
    <Compile Include="MediaWikiApi.cs" />
42
    <Compile Include="Webbie.cs" />
43
    <Compile Include="WikiImage.cs" />
44
    <Compile Include="WikiPage.cs" />
45
  </ItemGroup>
46
  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
47
</Project>

+ 156 - 0
MediaWikiCrawl/Webbie.cs

@ -0,0 +1,156 @@
1
//
2
// Webbie.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
using System.Diagnostics;
28
using System.IO;
29
using System.Net;
30
31
namespace MediaWikiCrawl
32
{
33
	/// <summary>
34
	/// Webbie.
35
	/// </summary>
36
	public class Webbie
37
	{
38
		/// <summary>
39
		/// Download the specified uri.
40
		/// </summary>
41
		/// <param name='uri'>
42
		/// URI.
43
		/// </param>
44
		public static byte[] Download(Uri uri)
45
		{
46
			if (uri == null)
47
			{
48
				throw new ArgumentNullException("uri");
49
			}
50
51
			try
52
			{
53
				// Configure the request.
54
				var request = WebRequest.Create(uri) as HttpWebRequest;
55
				request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1";
56
				request.KeepAlive = false;
57
				request.Timeout = 15 * 1000;
58
59
				using (var response = request.GetResponse().GetResponseStream())
60
				{
61
					if (request.HaveResponse && response != null)
62
					{
63
						var memoryStream = new MemoryStream();
64
						byte[] buffer = new byte[0x1000];
65
						int bytes;
66
67
						while ((bytes = response.Read(buffer, 0, buffer.Length)) > 0)
68
						{
69
							memoryStream.Write(buffer, 0, bytes);
70
						}
71
72
						return memoryStream.ToArray();
73
					}
74
				}
75
			}
76
			catch (WebException wex)
77
			{
78
				if (wex.Response != null)
79
				{
80
					using (var errorResponse = wex.Response as HttpWebResponse)
81
					{
82
						Debug.WriteLine(
83
							"[{1}] {0}",
84
							errorResponse.StatusDescription,
85
							errorResponse.StatusCode);
86
					}
87
				}
88
			}
89
90
			return null;
91
		}
92
93
		/// <summary>
94
		/// Get the specified uri.
95
		/// </summary>
96
		/// <param name='uri'>
97
		/// URI.
98
		/// </param>
99
		public static string Get(Uri uri)
100
		{
101
			if (uri == null)
102
			{
103
				throw new ArgumentNullException("uri");
104
			}
105
106
			var html = string.Empty;
107
108
			try
109
			{
110
				// Configure the request.
111
				var request = WebRequest.Create(uri) as HttpWebRequest;
112
				request.UserAgent = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1";
113
				request.KeepAlive = false;
114
				request.Timeout = 15 * 1000;
115
116
				using (var response = request.GetResponse() as HttpWebResponse)
117
				{
118
					if (request.HaveResponse && response != null)
119
					{
120
						using (var stream = new StreamReader(response.GetResponseStream()))
121
						{
122
							html = stream.ReadToEnd();
123
						}
124
					}
125
				}
126
			}
127
			catch (WebException wex)
128
			{
129
				if (wex.Response != null)
130
				{
131
					using (var errorResponse = wex.Response as HttpWebResponse)
132
					{
133
						Debug.WriteLine(
134
							"[{1}] {0}",
135
							errorResponse.StatusDescription,
136
							errorResponse.StatusCode);
137
					}
138
				}
139
			}
140
141
			return html;
142
		}
143
144
		/// <summary>
145
		/// Sanitize the specified input.
146
		/// </summary>
147
		/// <param name='input'>
148
		/// Input.
149
		/// </param>
150
		public static string Sanitize(string input)
151
		{
152
			// TODO: Provide proper sanitization!!!!
153
			return input;
154
		}
155
	}
156
}

+ 67 - 0
MediaWikiCrawl/WikiImage.cs

@ -0,0 +1,67 @@
1
//
2
// WikiImage.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System;
27
28
namespace MediaWikiCrawl
29
{
30
	/// <summary>
31
	/// Wiki image.
32
	/// </summary>
33
	public class WikiImage
34
	{
35
		/// <summary>
36
		/// Gets or sets the name.
37
		/// </summary>
38
		/// <value>
39
		/// The name.
40
		/// </value>
41
		public string Name { get; set; }
42
43
		/// <summary>
44
		/// Gets or sets the URL.
45
		/// </summary>
46
		/// <value>
47
		/// The URL.
48
		/// </value>
49
		public Uri Url { get; set; }
50
51
		/// <summary>
52
		/// Gets or sets the description URL.
53
		/// </summary>
54
		/// <value>
55
		/// The description URL.
56
		/// </value>
57
		public Uri DescriptionUrl { get; set; }
58
59
		/// <summary>
60
		/// Download this instance.
61
		/// </summary>
62
		public byte[] Download()
63
		{
64
			return Webbie.Download(this.Url);
65
		}
66
	}
67
}

+ 59 - 0
MediaWikiCrawl/WikiPage.cs

@ -0,0 +1,59 @@
1
//
2
// WikiPage.cs
3
//
4
// Author:
5
//       Bryan Allred <bryan.allred@gmail.com>
6
//
7
// Copyright (c) 2012 Bryan Allred
8
//
9
// Permission is hereby granted, free of charge, to any person obtaining a copy
10
// of this software and associated documentation files (the "Software"), to deal
11
// in the Software without restriction, including without limitation the rights
12
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
// copies of the Software, and to permit persons to whom the Software is
14
// furnished to do so, subject to the following conditions:
15
//
16
// The above copyright notice and this permission notice shall be included in
17
// all copies or substantial portions of the Software.
18
//
19
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
// THE SOFTWARE.
26
using System.Collections.Generic;
27
28
namespace MediaWikiCrawl
29
{
30
	/// <summary>
31
	/// Wiki page.
32
	/// </summary>
33
	public class WikiPage
34
	{
35
		/// <summary>
36
		/// Gets or sets the title.
37
		/// </summary>
38
		/// <value>
39
		/// The title.
40
		/// </value>
41
		public string Title { get; set; }
42
43
		/// <summary>
44
		/// Gets or sets the image titles.
45
		/// </summary>
46
		/// <value>
47
		/// The image titles.
48
		/// </value>
49
		public ICollection<string> ImageTitles { get; set; }
50
51
		/// <summary>
52
		/// Initializes a new instance of the <see cref="MediaWikiCrawl.WikiPage"/> class.
53
		/// </summary>
54
		public WikiPage()
55
		{
56
			this.ImageTitles = new List<string>();
57
		}
58
	}
59
}