This commit is contained in:
		
							
								
								
									
										110
									
								
								PROMS/EPocalipse.IFilter/ComHelper.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								PROMS/EPocalipse.IFilter/ComHelper.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Text; | ||||
| using System.Runtime.InteropServices; | ||||
|  | ||||
| namespace EPocalipse.IFilter | ||||
| { | ||||
|   [ComVisible(false)] | ||||
|   [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), Guid("00000001-0000-0000-C000-000000000046")] | ||||
|   internal interface IClassFactory | ||||
|   { | ||||
|     void CreateInstance([MarshalAs(UnmanagedType.Interface)] object pUnkOuter, ref Guid refiid, [MarshalAs(UnmanagedType.Interface)] out object ppunk); | ||||
|     void LockServer(bool fLock); | ||||
|   } | ||||
|  | ||||
|   /// <summary> | ||||
|   /// Utility class to get a Class Factory for a certain Class ID  | ||||
|   /// by loading the dll that implements that class | ||||
|   /// </summary> | ||||
|   internal static class ComHelper | ||||
|   { | ||||
|     //DllGetClassObject fuction pointer signature | ||||
|     private delegate int DllGetClassObject(ref Guid ClassId, ref Guid InterfaceId, [Out, MarshalAs(UnmanagedType.Interface)] out object ppunk); | ||||
|  | ||||
|     //Some win32 methods to load\unload dlls and get a function pointer | ||||
|     private class Win32NativeMethods | ||||
|     { | ||||
|       [DllImport("kernel32.dll", CharSet=CharSet.Ansi)] | ||||
|       public static extern IntPtr GetProcAddress(IntPtr hModule, string lpProcName); | ||||
|  | ||||
|       [DllImport("kernel32.dll")] | ||||
|       public static extern bool FreeLibrary(IntPtr hModule); | ||||
|  | ||||
|       [DllImport("kernel32.dll")] | ||||
|       public static extern IntPtr LoadLibrary(string lpFileName); | ||||
|     } | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Holds a list of dll handles and unloads the dlls  | ||||
|     /// in the destructor | ||||
|     /// </summary> | ||||
|     private class DllList | ||||
|     { | ||||
|       private List<IntPtr> _dllList=new List<IntPtr>(); | ||||
|       public void AddDllHandle(IntPtr dllHandle) | ||||
|       { | ||||
|         lock (_dllList) | ||||
|         { | ||||
|           _dllList.Add(dllHandle); | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       ~DllList() | ||||
|       { | ||||
|         foreach (IntPtr dllHandle in _dllList) | ||||
|         { | ||||
|           try | ||||
|           { | ||||
|             Win32NativeMethods.FreeLibrary(dllHandle); | ||||
|           } | ||||
|           catch { }; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     static DllList _dllList=new DllList(); | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Gets a class factory for a specific COM Class ID.  | ||||
|     /// </summary> | ||||
|     /// <param name="dllName">The dll where the COM class is implemented</param> | ||||
|     /// <param name="filterPersistClass">The requested Class ID</param> | ||||
|     /// <returns>IClassFactory instance used to create instances of that class</returns> | ||||
|     internal static IClassFactory GetClassFactory(string dllName, string filterPersistClass) | ||||
|     { | ||||
|       //Load the class factory from the dll | ||||
|       IClassFactory classFactory=GetClassFactoryFromDll(dllName, filterPersistClass); | ||||
|       return classFactory; | ||||
|     } | ||||
|  | ||||
|     private static IClassFactory GetClassFactoryFromDll(string dllName, string filterPersistClass) | ||||
|     { | ||||
|       //Load the dll | ||||
|       IntPtr dllHandle=Win32NativeMethods.LoadLibrary(dllName); | ||||
|       if (dllHandle==IntPtr.Zero) | ||||
|         return null; | ||||
|  | ||||
|       //Keep a reference to the dll until the process\AppDomain dies | ||||
|       _dllList.AddDllHandle(dllHandle); | ||||
|  | ||||
|       //Get a pointer to the DllGetClassObject function | ||||
|       IntPtr dllGetClassObjectPtr=Win32NativeMethods.GetProcAddress(dllHandle, "DllGetClassObject"); | ||||
|       if (dllGetClassObjectPtr==IntPtr.Zero) | ||||
|         return null; | ||||
|  | ||||
|       //Convert the function pointer to a .net delegate | ||||
|       DllGetClassObject dllGetClassObject=(DllGetClassObject)Marshal.GetDelegateForFunctionPointer(dllGetClassObjectPtr, typeof(DllGetClassObject)); | ||||
|  | ||||
|       //Call the DllGetClassObject to retreive a class factory for out Filter class | ||||
|       Guid filterPersistGUID=new Guid(filterPersistClass); | ||||
|       Guid IClassFactoryGUID=new Guid("00000001-0000-0000-C000-000000000046"); //IClassFactory class id | ||||
|       Object unk; | ||||
|       if (dllGetClassObject(ref filterPersistGUID, ref IClassFactoryGUID, out unk)!=0) | ||||
|         return null; | ||||
|  | ||||
|       //Yippie! cast the returned object to IClassFactory | ||||
|       return (unk as IClassFactory); | ||||
|     } | ||||
|   } | ||||
| } | ||||
							
								
								
									
										52
									
								
								PROMS/EPocalipse.IFilter/EPocalipse.IFilter.csproj
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								PROMS/EPocalipse.IFilter/EPocalipse.IFilter.csproj
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||||
|   <PropertyGroup> | ||||
|     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||||
|     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> | ||||
|     <ProductVersion>8.0.50727</ProductVersion> | ||||
|     <SchemaVersion>2.0</SchemaVersion> | ||||
|     <ProjectGuid>{400462CE-40B3-498F-B95A-B1D2AE679359}</ProjectGuid> | ||||
|     <OutputType>Library</OutputType> | ||||
|     <AppDesignerFolder>Properties</AppDesignerFolder> | ||||
|     <RootNamespace>EPocalipse.IFilter</RootNamespace> | ||||
|     <AssemblyName>EPocalipse.IFilter</AssemblyName> | ||||
|   </PropertyGroup> | ||||
|   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> | ||||
|     <DebugSymbols>true</DebugSymbols> | ||||
|     <DebugType>full</DebugType> | ||||
|     <Optimize>false</Optimize> | ||||
|     <OutputPath>bin\Debug\</OutputPath> | ||||
|     <DefineConstants>DEBUG;TRACE</DefineConstants> | ||||
|     <ErrorReport>prompt</ErrorReport> | ||||
|     <WarningLevel>4</WarningLevel> | ||||
|     <DocumentationFile> | ||||
|     </DocumentationFile> | ||||
|   </PropertyGroup> | ||||
|   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> | ||||
|     <DebugType>pdbonly</DebugType> | ||||
|     <Optimize>true</Optimize> | ||||
|     <OutputPath>bin\Release\</OutputPath> | ||||
|     <DefineConstants>TRACE</DefineConstants> | ||||
|     <ErrorReport>prompt</ErrorReport> | ||||
|     <WarningLevel>4</WarningLevel> | ||||
|   </PropertyGroup> | ||||
|   <ItemGroup> | ||||
|     <Reference Include="System" /> | ||||
|     <Reference Include="System.Data" /> | ||||
|     <Reference Include="System.Xml" /> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <Compile Include="FilterReader.cs" /> | ||||
|     <Compile Include="ComHelper.cs" /> | ||||
|     <Compile Include="FilterLoader.cs" /> | ||||
|     <Compile Include="IFilter.cs" /> | ||||
|     <Compile Include="Properties\AssemblyInfo.cs" /> | ||||
|   </ItemGroup> | ||||
|   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> | ||||
|   <!-- To modify your build process, add your task inside one of the targets below and uncomment it.  | ||||
|        Other similar extension points exist, see Microsoft.Common.targets. | ||||
|   <Target Name="BeforeBuild"> | ||||
|   </Target> | ||||
|   <Target Name="AfterBuild"> | ||||
|   </Target> | ||||
|   --> | ||||
| </Project> | ||||
							
								
								
									
										20
									
								
								PROMS/EPocalipse.IFilter/EPocalipse.IFilter.sln
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								PROMS/EPocalipse.IFilter/EPocalipse.IFilter.sln
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
|  | ||||
| Microsoft Visual Studio Solution File, Format Version 9.00 | ||||
| # Visual Studio 2005 | ||||
| Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EPocalipse.IFilter", "EPocalipse.IFilter.csproj", "{400462CE-40B3-498F-B95A-B1D2AE679359}" | ||||
| EndProject | ||||
| Global | ||||
| 	GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||||
| 		Debug|Any CPU = Debug|Any CPU | ||||
| 		Release|Any CPU = Release|Any CPU | ||||
| 	EndGlobalSection | ||||
| 	GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||||
| 		{400462CE-40B3-498F-B95A-B1D2AE679359}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||||
| 		{400462CE-40B3-498F-B95A-B1D2AE679359}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||||
| 		{400462CE-40B3-498F-B95A-B1D2AE679359}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||
| 		{400462CE-40B3-498F-B95A-B1D2AE679359}.Release|Any CPU.Build.0 = Release|Any CPU | ||||
| 	EndGlobalSection | ||||
| 	GlobalSection(SolutionProperties) = preSolution | ||||
| 		HideSolutionNode = FALSE | ||||
| 	EndGlobalSection | ||||
| EndGlobal | ||||
							
								
								
									
										226
									
								
								PROMS/EPocalipse.IFilter/FilterLoader.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								PROMS/EPocalipse.IFilter/FilterLoader.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Text; | ||||
| using Microsoft.Win32; | ||||
| using System.IO; | ||||
| using System.Runtime.InteropServices.ComTypes; | ||||
| using System.Runtime.InteropServices; | ||||
|  | ||||
| namespace EPocalipse.IFilter | ||||
| { | ||||
|   /// <summary> | ||||
|   /// FilterLoader finds the dll and ClassID of the COM object responsible   | ||||
|   /// for filtering a specific file extension.  | ||||
|   /// It then loads that dll, creates the appropriate COM object and returns  | ||||
|   /// a pointer to an IFilter instance | ||||
|   /// </summary> | ||||
|   static class FilterLoader | ||||
|   { | ||||
|     #region CacheEntry | ||||
|     private class CacheEntry | ||||
|     { | ||||
|       public string DllName; | ||||
|       public string ClassName; | ||||
|  | ||||
|       public CacheEntry(string dllName, string className) | ||||
|       { | ||||
|         DllName=dllName; | ||||
|         ClassName=className; | ||||
|       } | ||||
|     } | ||||
|     #endregion | ||||
|  | ||||
|     static Dictionary<string, CacheEntry> _cache=new Dictionary<string, CacheEntry>(); | ||||
|  | ||||
|     #region Registry Read String helper | ||||
|     static string ReadStrFromHKLM(string key) | ||||
|     { | ||||
|       return ReadStrFromHKLM(key,null); | ||||
|     } | ||||
|     static string ReadStrFromHKLM(string key, string value) | ||||
|     { | ||||
|       RegistryKey rk=Registry.LocalMachine.OpenSubKey(key); | ||||
|       if (rk==null) | ||||
|         return null; | ||||
|  | ||||
|       using (rk) | ||||
|       { | ||||
|         return (string)rk.GetValue(value); | ||||
|       } | ||||
|     } | ||||
|     #endregion | ||||
|  | ||||
|     /// <summary> | ||||
|     /// finds an IFilter implementation for a file type | ||||
|     /// </summary> | ||||
|     /// <param name="ext">The extension of the file</param> | ||||
|     /// <returns>an IFilter instance used to retreive text from that file type</returns> | ||||
|     private static IFilter LoadIFilter(string ext) | ||||
|     { | ||||
|       string dllName, filterPersistClass; | ||||
|  | ||||
|       //Find the dll and ClassID | ||||
|       if (GetFilterDllAndClass(ext, out dllName, out filterPersistClass)) | ||||
|       { | ||||
|         //load the dll and return an IFilter instance. | ||||
|         return LoadFilterFromDll(dllName, filterPersistClass); | ||||
|       } | ||||
|       return null; | ||||
|     } | ||||
|  | ||||
|     internal static IFilter LoadAndInitIFilter(string fileName) | ||||
|     { | ||||
|       return LoadAndInitIFilter(fileName,Path.GetExtension(fileName)); | ||||
|     } | ||||
|  | ||||
|     internal static IFilter LoadAndInitIFilter(string fileName, string extension) | ||||
|     { | ||||
|       IFilter filter=LoadIFilter(extension); | ||||
|       if (filter==null) | ||||
|         return null; | ||||
|       IPersistFile persistFile=(filter as IPersistFile); | ||||
|       if (persistFile!=null) | ||||
|       { | ||||
|         persistFile.Load(fileName, 0); | ||||
|         IFILTER_FLAGS flags; | ||||
|         IFILTER_INIT iflags = | ||||
| 					IFILTER_INIT.CANON_HYPHENS | | ||||
| 					IFILTER_INIT.CANON_PARAGRAPHS | | ||||
| 					IFILTER_INIT.CANON_SPACES | | ||||
| 					IFILTER_INIT.APPLY_INDEX_ATTRIBUTES | | ||||
| 					IFILTER_INIT.HARD_LINE_BREAKS | | ||||
| 					IFILTER_INIT.FILTER_OWNED_VALUE_OK; | ||||
|  | ||||
|         if (filter.Init(iflags, 0, IntPtr.Zero, out flags)==IFilterReturnCode.S_OK) | ||||
|           return filter; | ||||
|       } | ||||
|       //If we failed to retreive an IPersistFile interface or to initialize  | ||||
|       //the filter, we release it and return null. | ||||
|       Marshal.ReleaseComObject(filter); | ||||
|       return null; | ||||
|     } | ||||
|  | ||||
|     private static IFilter LoadFilterFromDll(string dllName, string filterPersistClass) | ||||
|     { | ||||
|       //Get a classFactory for our classID | ||||
|       IClassFactory classFactory=ComHelper.GetClassFactory(dllName, filterPersistClass); | ||||
|       if (classFactory==null) | ||||
|         return null; | ||||
|  | ||||
|       //And create an IFilter instance using that class factory | ||||
|       Guid IFilterGUID=new Guid("89BCB740-6119-101A-BCB7-00DD010655AF"); | ||||
|       Object obj; | ||||
|       classFactory.CreateInstance(null, ref IFilterGUID, out obj); | ||||
|       return (obj as IFilter); | ||||
|     } | ||||
|  | ||||
|     private static bool GetFilterDllAndClass(string ext, out string dllName, out string filterPersistClass) | ||||
|     { | ||||
|       if (!GetFilterDllAndClassFromCache(ext, out dllName, out filterPersistClass)) | ||||
|       { | ||||
|         string persistentHandlerClass; | ||||
|  | ||||
|         persistentHandlerClass=GetPersistentHandlerClass(ext,true); | ||||
|         if (persistentHandlerClass!=null) | ||||
|         { | ||||
|           GetFilterDllAndClassFromPersistentHandler(persistentHandlerClass, | ||||
|             out dllName, out filterPersistClass); | ||||
|         } | ||||
|         AddExtensionToCache(ext, dllName, filterPersistClass); | ||||
|       } | ||||
|       return (dllName!=null && filterPersistClass!=null);  | ||||
|     } | ||||
|  | ||||
|     private static void AddExtensionToCache(string ext, string dllName, string filterPersistClass) | ||||
|     { | ||||
|       lock (_cache) | ||||
|       { | ||||
|         _cache.Add(ext.ToLower(), new CacheEntry(dllName, filterPersistClass)); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     private static bool GetFilterDllAndClassFromPersistentHandler(string persistentHandlerClass, out string dllName, out string filterPersistClass) | ||||
|     { | ||||
|       dllName=null; | ||||
|       filterPersistClass=null; | ||||
|  | ||||
|       //Read the CLASS ID of the IFilter persistent handler | ||||
|       filterPersistClass=ReadStrFromHKLM(@"Software\Classes\CLSID\" + persistentHandlerClass +  | ||||
|         @"\PersistentAddinsRegistered\{89BCB740-6119-101A-BCB7-00DD010655AF}"); | ||||
|       if (String.IsNullOrEmpty(filterPersistClass)) | ||||
|           return false; | ||||
|  | ||||
|       //Read the dll name  | ||||
|       dllName=ReadStrFromHKLM(@"Software\Classes\CLSID\" + filterPersistClass +  | ||||
|         @"\InprocServer32"); | ||||
|       return (!String.IsNullOrEmpty(dllName)); | ||||
|     } | ||||
|  | ||||
|     private static string GetPersistentHandlerClass(string ext, bool searchContentType) | ||||
|     { | ||||
|       //Try getting the info from the file extension | ||||
|       string persistentHandlerClass=GetPersistentHandlerClassFromExtension(ext); | ||||
|       if (String.IsNullOrEmpty(persistentHandlerClass)) | ||||
|         //try getting the info from the document type  | ||||
|         persistentHandlerClass=GetPersistentHandlerClassFromDocumentType(ext); | ||||
|       if (searchContentType && String.IsNullOrEmpty(persistentHandlerClass)) | ||||
|         //Try getting the info from the Content Type | ||||
|         persistentHandlerClass=GetPersistentHandlerClassFromContentType(ext); | ||||
|       return persistentHandlerClass; | ||||
|     } | ||||
|  | ||||
|     private static string GetPersistentHandlerClassFromContentType(string ext) | ||||
|     { | ||||
|       string contentType=ReadStrFromHKLM(@"Software\Classes\"+ext,"Content Type"); | ||||
|       if (String.IsNullOrEmpty(contentType)) | ||||
|         return null; | ||||
|        | ||||
|       string contentTypeExtension=ReadStrFromHKLM(@"Software\Classes\MIME\Database\Content Type\"+contentType, | ||||
|           "Extension"); | ||||
|       if (ext.Equals(contentTypeExtension, StringComparison.CurrentCultureIgnoreCase)) | ||||
|         return null; //No need to look further. This extension does not have any persistent handler | ||||
|      | ||||
|       //We know the extension that is assciated with that content type. Simply try again with the new extension | ||||
|       return GetPersistentHandlerClass(contentTypeExtension, false); //Don't search content type this time. | ||||
|     } | ||||
|  | ||||
|     private static string GetPersistentHandlerClassFromDocumentType(string ext) | ||||
|     { | ||||
|       //Get the DocumentType of this file extension | ||||
|       string docType=ReadStrFromHKLM(@"Software\Classes\"+ext); | ||||
|       if (String.IsNullOrEmpty(docType)) | ||||
|         return null; | ||||
|        | ||||
|       //Get the Class ID for this document type | ||||
|       string docClass=ReadStrFromHKLM(@"Software\Classes\" + docType + @"\CLSID"); | ||||
|       if (String.IsNullOrEmpty(docType)) | ||||
|         return null; | ||||
|  | ||||
|       //Now get the PersistentHandler for that Class ID | ||||
|       return ReadStrFromHKLM(@"Software\Classes\CLSID\" + docClass + @"\PersistentHandler"); | ||||
|     } | ||||
|  | ||||
|     private static string GetPersistentHandlerClassFromExtension(string ext) | ||||
|     { | ||||
|       return ReadStrFromHKLM(@"Software\Classes\"+ext+@"\PersistentHandler"); | ||||
|     } | ||||
|  | ||||
|     private static bool GetFilterDllAndClassFromCache(string ext, out string dllName, out string filterPersistClass) | ||||
|     { | ||||
|       string lowerExt=ext.ToLower(); | ||||
|       lock (_cache) | ||||
|       { | ||||
|         CacheEntry cacheEntry; | ||||
|         if (_cache.TryGetValue(lowerExt, out cacheEntry)) | ||||
|         { | ||||
|           dllName=cacheEntry.DllName; | ||||
|           filterPersistClass=cacheEntry.ClassName; | ||||
|           return true; | ||||
|         } | ||||
|       } | ||||
|       dllName=null; | ||||
|       filterPersistClass=null; | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
| } | ||||
							
								
								
									
										112
									
								
								PROMS/EPocalipse.IFilter/FilterReader.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								PROMS/EPocalipse.IFilter/FilterReader.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,112 @@ | ||||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Text; | ||||
| using System.IO; | ||||
| using System.Runtime.InteropServices; | ||||
|  | ||||
| namespace EPocalipse.IFilter | ||||
| { | ||||
|   /// <summary> | ||||
|   /// Implements a TextReader that reads from an IFilter.  | ||||
|   /// </summary> | ||||
|   public class FilterReader : TextReader | ||||
|   { | ||||
|     IFilter _filter; | ||||
|     private bool _done; | ||||
|     private STAT_CHUNK _currentChunk; | ||||
|     private bool _currentChunkValid; | ||||
|     private char[] _charsLeftFromLastRead; | ||||
|  | ||||
|     public override void Close() | ||||
|     { | ||||
|       Dispose(true); | ||||
|       GC.SuppressFinalize(this); | ||||
|     } | ||||
|  | ||||
|     ~FilterReader() | ||||
|     { | ||||
|       Dispose(false); | ||||
|     } | ||||
|  | ||||
|     protected override void Dispose(bool disposing) | ||||
|     { | ||||
|       if (_filter!=null) | ||||
|         Marshal.ReleaseComObject(_filter); | ||||
|     } | ||||
|  | ||||
|     public override int Read(char[] array, int offset, int count) | ||||
|     { | ||||
|       int endOfChunksCount=0; | ||||
|       int charsRead=0; | ||||
|  | ||||
|       while (!_done && charsRead<count) | ||||
|       { | ||||
|         if (_charsLeftFromLastRead!=null) | ||||
|         { | ||||
|           int charsToCopy=(_charsLeftFromLastRead.Length<count-charsRead)?_charsLeftFromLastRead.Length:count-charsRead; | ||||
|           Array.Copy(_charsLeftFromLastRead,0, array, offset+charsRead, charsToCopy); | ||||
|           charsRead+=charsToCopy; | ||||
|           if (charsToCopy<_charsLeftFromLastRead.Length) | ||||
|           { | ||||
|             char[] tmp=new char[_charsLeftFromLastRead.Length-charsToCopy]; | ||||
|             Array.Copy(_charsLeftFromLastRead, charsToCopy, tmp, 0, tmp.Length); | ||||
|             _charsLeftFromLastRead=tmp; | ||||
|           } | ||||
|           else | ||||
|             _charsLeftFromLastRead=null; | ||||
|           continue; | ||||
|         }; | ||||
|  | ||||
|         if (!_currentChunkValid) | ||||
|         { | ||||
|           IFilterReturnCode res=_filter.GetChunk(out _currentChunk); | ||||
|           _currentChunkValid=(res==IFilterReturnCode.S_OK) && ((_currentChunk.flags & CHUNKSTATE.CHUNK_TEXT)!=0); | ||||
|  | ||||
|           if (res==IFilterReturnCode.FILTER_E_END_OF_CHUNKS) | ||||
|             endOfChunksCount++; | ||||
|  | ||||
|           if (endOfChunksCount>1) | ||||
|             _done=true; //That's it. no more chuncks available | ||||
|         } | ||||
|  | ||||
|         if (_currentChunkValid) | ||||
|         { | ||||
|           uint bufLength=(uint)(count-charsRead); | ||||
|           if (bufLength<8192) | ||||
|             bufLength=8192; //Read ahead | ||||
|  | ||||
|           char[] buffer=new char[bufLength]; | ||||
|           IFilterReturnCode res=_filter.GetText(ref bufLength, buffer); | ||||
| 					buffer[bufLength++] = ' '; | ||||
|           if (res==IFilterReturnCode.S_OK || res==IFilterReturnCode.FILTER_S_LAST_TEXT) | ||||
|           { | ||||
|             int cRead=(int)bufLength; | ||||
|             if (cRead+charsRead>count) | ||||
|             { | ||||
|               int charsLeft=(cRead+charsRead-count); | ||||
|               _charsLeftFromLastRead=new char[charsLeft]; | ||||
|               Array.Copy(buffer, cRead-charsLeft, _charsLeftFromLastRead, 0, charsLeft); | ||||
|               cRead-=charsLeft; | ||||
|             } | ||||
|             else | ||||
|               _charsLeftFromLastRead=null; | ||||
|  | ||||
|             Array.Copy(buffer, 0, array, offset+charsRead, cRead); | ||||
|             charsRead+=cRead; | ||||
|           } | ||||
|  | ||||
|           if (res==IFilterReturnCode.FILTER_S_LAST_TEXT || res==IFilterReturnCode.FILTER_E_NO_MORE_TEXT) | ||||
|             _currentChunkValid=false; | ||||
|         } | ||||
|       } | ||||
|       return charsRead; | ||||
|     } | ||||
|  | ||||
|     public FilterReader(string fileName) | ||||
|     { | ||||
|       _filter=FilterLoader.LoadAndInitIFilter(fileName); | ||||
|       if (_filter==null) | ||||
|         throw new ArgumentException("no filter defined for "+fileName); | ||||
|     } | ||||
|   } | ||||
| } | ||||
							
								
								
									
										435
									
								
								PROMS/EPocalipse.IFilter/IFilter.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										435
									
								
								PROMS/EPocalipse.IFilter/IFilter.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,435 @@ | ||||
| using System; | ||||
| using System.Text; | ||||
| using System.Runtime.InteropServices; | ||||
|  | ||||
| //Contains IFilter interface translation | ||||
| //Most translations are from PInvoke.net | ||||
|  | ||||
| namespace EPocalipse.IFilter | ||||
| { | ||||
|   [StructLayout(LayoutKind.Sequential)] | ||||
|   public struct FULLPROPSPEC  | ||||
|   { | ||||
|     public Guid guidPropSet; | ||||
|     public PROPSPEC psProperty; | ||||
|   } | ||||
|  | ||||
|   [StructLayout(LayoutKind.Sequential)] | ||||
|   internal struct FILTERREGION  | ||||
|   { | ||||
|     public int idChunk; | ||||
|     public int cwcStart; | ||||
|     public int cwcExtent; | ||||
|   } | ||||
|  | ||||
|   [StructLayout(LayoutKind.Explicit)] | ||||
|   public struct PROPSPEC | ||||
|   { | ||||
|     [FieldOffset(0)] public int ulKind;     // 0 - string used; 1 - PROPID | ||||
|     [FieldOffset(4)] public int propid;     | ||||
|     [FieldOffset(4)] public IntPtr lpwstr; | ||||
|   } | ||||
|  | ||||
|   [Flags] | ||||
|   internal enum IFILTER_FLAGS  | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// The caller should use the IPropertySetStorage and IPropertyStorage | ||||
|     /// interfaces to locate additional properties.  | ||||
|     /// When this flag is set, properties available through COM | ||||
|     /// enumerators should not be returned from IFilter.  | ||||
|     /// </summary> | ||||
|     IFILTER_FLAGS_OLE_PROPERTIES = 1 | ||||
|   } | ||||
|  | ||||
|   /// <summary> | ||||
|   /// Flags controlling the operation of the FileFilter | ||||
|   /// instance. | ||||
|   /// </summary> | ||||
|   [Flags] | ||||
|   internal enum IFILTER_INIT | ||||
|   { | ||||
|     NONE = 0, | ||||
|     /// <summary> | ||||
|     /// Paragraph breaks should be marked with the Unicode PARAGRAPH | ||||
|     /// SEPARATOR (0x2029) | ||||
|     /// </summary> | ||||
|     CANON_PARAGRAPHS = 1, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Soft returns, such as the newline character in Microsoft Word, should | ||||
|     /// be replaced by hard returnsLINE SEPARATOR (0x2028). Existing hard | ||||
|     /// returns can be doubled. A carriage return (0x000D), line feed (0x000A), | ||||
|     /// or the carriage return and line feed in combination should be considered | ||||
|     /// a hard return. The intent is to enable pattern-expression matches that | ||||
|     /// match against observed line breaks.  | ||||
|     /// </summary> | ||||
|     HARD_LINE_BREAKS = 2, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Various word-processing programs have forms of hyphens that are not | ||||
|     /// represented in the host character set, such as optional hyphens | ||||
|     /// (appearing only at the end of a line) and nonbreaking hyphens. This flag | ||||
|     /// indicates that optional hyphens are to be converted to nulls, and | ||||
|     /// non-breaking hyphens are to be converted to normal hyphens (0x2010), or | ||||
|     /// HYPHEN-MINUSES (0x002D).  | ||||
|     /// </summary> | ||||
|     CANON_HYPHENS = 4, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Just as the CANON_HYPHENS flag standardizes hyphens, | ||||
|     /// this one standardizes spaces. All special space characters, such as | ||||
|     /// nonbreaking spaces, are converted to the standard space character | ||||
|     /// (0x0020).  | ||||
|     /// </summary> | ||||
|     CANON_SPACES = 8, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Indicates that the client wants text split into chunks representing | ||||
|     /// public value-type properties.  | ||||
|     /// </summary> | ||||
|     APPLY_INDEX_ATTRIBUTES = 16, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Indicates that the client wants text split into chunks representing | ||||
|     /// properties determined during the indexing process.  | ||||
|     /// </summary> | ||||
|     APPLY_CRAWL_ATTRIBUTES = 256, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Any properties not covered by the APPLY_INDEX_ATTRIBUTES | ||||
|     /// and APPLY_CRAWL_ATTRIBUTES flags should be emitted.  | ||||
|     /// </summary> | ||||
|     APPLY_OTHER_ATTRIBUTES = 32, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Optimizes IFilter for indexing because the client calls the | ||||
|     /// IFilter::Init method only once and does not call IFilter::BindRegion. | ||||
|     /// This eliminates the possibility of accessing a chunk both before and | ||||
|     /// after accessing another chunk.  | ||||
|     /// </summary> | ||||
|     INDEXING_ONLY = 64, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The text extraction process must recursively search all linked | ||||
|     /// objects within the document. If a link is unavailable, the | ||||
|     /// IFilter::GetChunk call that would have obtained the first chunk of the | ||||
|     /// link should return FILTER_E_LINK_UNAVAILABLE.  | ||||
|     /// </summary> | ||||
|     SEARCH_LINKS = 128, | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The content indexing process can return property values set by the  filter.  | ||||
|     /// </summary> | ||||
|     FILTER_OWNED_VALUE_OK = 512 | ||||
|   } | ||||
|  | ||||
|   public struct STAT_CHUNK  | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// The chunk identifier. Chunk identifiers must be unique for the | ||||
|     /// current instance of the IFilter interface.  | ||||
|     /// Chunk identifiers must be in ascending order. The order in which | ||||
|     /// chunks are numbered should correspond to the order in which they appear | ||||
|     /// in the source document. Some search engines can take advantage of the | ||||
|     /// proximity of chunks of various properties. If so, the order in which | ||||
|     /// chunks with different properties are emitted will be important to the | ||||
|     /// search engine.  | ||||
|     /// </summary> | ||||
|     public int idChunk; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The type of break that separates the previous chunk from the current | ||||
|     ///  chunk. Values are from the CHUNK_BREAKTYPE enumeration.  | ||||
|     /// </summary> | ||||
|     [MarshalAs(UnmanagedType.U4)] | ||||
|     public CHUNK_BREAKTYPE breakType; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Flags indicate whether this chunk contains a text-type or a | ||||
|     /// value-type property.  | ||||
|     /// Flag values are taken from the CHUNKSTATE enumeration. If the CHUNK_TEXT flag is set,  | ||||
|     /// IFilter::GetText should be used to retrieve the contents of the chunk | ||||
|     /// as a series of words.  | ||||
|     /// If the CHUNK_VALUE flag is set, IFilter::GetValue should be used to retrieve  | ||||
|     /// the value and treat it as a single property value. If the filter dictates that the same  | ||||
|     /// content be treated as both text and as a value, the chunk should be emitted twice in two        | ||||
|     /// different chunks, each with one flag set.  | ||||
|     /// </summary> | ||||
|     [MarshalAs(UnmanagedType.U4)] | ||||
|     public CHUNKSTATE flags; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The language and sublanguage associated with a chunk of text. Chunk locale is used  | ||||
|     /// by document indexers to perform proper word breaking of text. If the chunk is  | ||||
|     /// neither text-type nor a value-type with data type VT_LPWSTR, VT_LPSTR or VT_BSTR,  | ||||
|     /// this field is ignored.  | ||||
|     /// </summary> | ||||
|     public int locale; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The property to be applied to the chunk. If a filter requires that       the same text  | ||||
|     /// have more than one property, it needs to emit the text once for each       property  | ||||
|     /// in separate chunks.  | ||||
|     /// </summary> | ||||
|     public FULLPROPSPEC attribute; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The ID of the source of a chunk. The value of the idChunkSource     member depends on the nature of the chunk:  | ||||
|     /// If the chunk is a text-type property, the value of the idChunkSource       member must be the same as the value of the idChunk member.  | ||||
|     /// If the chunk is an public value-type property derived from textual       content, the value of the idChunkSource member is the chunk ID for the | ||||
|     /// text-type chunk from which it is derived.  | ||||
|     /// If the filter attributes specify to return only public value-type | ||||
|     /// properties, there is no content chunk from which to derive the current | ||||
|     /// public value-type property. In this case, the value of the | ||||
|     /// idChunkSource member must be set to zero, which is an invalid chunk.  | ||||
|     /// </summary> | ||||
|     public int idChunkSource; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The offset from which the source text for a derived chunk starts in | ||||
|     /// the source chunk.  | ||||
|     /// </summary> | ||||
|     public int cwcStartSource; | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The length in characters of the source text from which the current | ||||
|     /// chunk was derived.  | ||||
|     /// A zero value signifies character-by-character correspondence between | ||||
|     /// the source text and  | ||||
|     /// the derived text. A nonzero value means that no such direct | ||||
|     /// correspondence exists | ||||
|     /// </summary> | ||||
|     public int cwcLenSource; | ||||
|   } | ||||
|  | ||||
|   /// <summary> | ||||
|   /// Enumerates the different breaking types that occur between  | ||||
|   /// chunks of text read out by the FileFilter. | ||||
|   /// </summary> | ||||
|   public enum CHUNK_BREAKTYPE | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// No break is placed between the current chunk and the previous chunk. | ||||
|     /// The chunks are glued together.  | ||||
|     /// </summary> | ||||
|     CHUNK_NO_BREAK = 0, | ||||
|     /// <summary> | ||||
|     /// A word break is placed between this chunk and the previous chunk that | ||||
|     /// had the same attribute.  | ||||
|     /// Use of CHUNK_EOW should be minimized because the choice of word | ||||
|     /// breaks is language-dependent,  | ||||
|     /// so determining word breaks is best left to the search engine.  | ||||
|     /// </summary> | ||||
|     CHUNK_EOW = 1, | ||||
|     /// <summary> | ||||
|     /// A sentence break is placed between this chunk and the previous chunk | ||||
|     /// that had the same attribute.  | ||||
|     /// </summary> | ||||
|     CHUNK_EOS = 2, | ||||
|     /// <summary> | ||||
|     /// A paragraph break is placed between this chunk and the previous chunk | ||||
|     /// that had the same attribute. | ||||
|     /// </summary>      | ||||
|     CHUNK_EOP = 3, | ||||
|     /// <summary> | ||||
|     /// A chapter break is placed between this chunk and the previous chunk | ||||
|     /// that had the same attribute.  | ||||
|     /// </summary> | ||||
|     CHUNK_EOC = 4 | ||||
|   } | ||||
|  | ||||
|  | ||||
|   public enum CHUNKSTATE  | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// The current chunk is a text-type property. | ||||
|     /// </summary> | ||||
|     CHUNK_TEXT = 0x1, | ||||
|     /// <summary> | ||||
|     /// The current chunk is a value-type property.  | ||||
|     /// </summary> | ||||
|     CHUNK_VALUE = 0x2, | ||||
|     /// <summary> | ||||
|     /// Reserved | ||||
|     /// </summary> | ||||
|     CHUNK_FILTER_OWNED_VALUE = 0x4 | ||||
|   } | ||||
|  | ||||
|   internal enum IFilterReturnCode : uint  | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// Success | ||||
|     /// </summary> | ||||
|     S_OK = 0, | ||||
|     /// <summary> | ||||
|     /// The function was denied access to the filter file.  | ||||
|     /// </summary> | ||||
|     E_ACCESSDENIED = 0x80070005, | ||||
|     /// <summary> | ||||
|     /// The function encountered an invalid handle, | ||||
|     /// probably due to a low-memory situation.  | ||||
|     /// </summary> | ||||
|     E_HANDLE = 0x80070006, | ||||
|     /// <summary> | ||||
|     /// The function received an invalid parameter. | ||||
|     /// </summary> | ||||
|     E_INVALIDARG = 0x80070057, | ||||
|     /// <summary> | ||||
|     /// Out of memory | ||||
|     /// </summary> | ||||
|     E_OUTOFMEMORY = 0x8007000E, | ||||
|     /// <summary> | ||||
|     /// Not implemented | ||||
|     /// </summary> | ||||
|     E_NOTIMPL = 0x80004001, | ||||
|     /// <summary> | ||||
|     /// Unknown error | ||||
|     /// </summary> | ||||
|     E_FAIL = 0x80000008, | ||||
|     /// <summary> | ||||
|     /// File not filtered due to password protection | ||||
|     /// </summary> | ||||
|     FILTER_E_PASSWORD = 0x8004170B, | ||||
|     /// <summary> | ||||
|     /// The document format is not recognised by the filter | ||||
|     /// </summary> | ||||
|     FILTER_E_UNKNOWNFORMAT = 0x8004170C, | ||||
|     /// <summary> | ||||
|     /// No text in current chunk | ||||
|     /// </summary> | ||||
|     FILTER_E_NO_TEXT = 0x80041705, | ||||
|     /// <summary> | ||||
|     /// No more chunks of text available in object | ||||
|     /// </summary> | ||||
|     FILTER_E_END_OF_CHUNKS = 0x80041700, | ||||
|     /// <summary> | ||||
|     /// No more text available in chunk | ||||
|     /// </summary> | ||||
|     FILTER_E_NO_MORE_TEXT = 0x80041701, | ||||
|     /// <summary> | ||||
|     /// No more property values available in chunk | ||||
|     /// </summary> | ||||
|     FILTER_E_NO_MORE_VALUES = 0x80041702, | ||||
|     /// <summary> | ||||
|     /// Unable to access object | ||||
|     /// </summary> | ||||
|     FILTER_E_ACCESS = 0x80041703, | ||||
|     /// <summary> | ||||
|     /// Moniker doesn't cover entire region | ||||
|     /// </summary> | ||||
|     FILTER_W_MONIKER_CLIPPED = 0x00041704, | ||||
|     /// <summary> | ||||
|     /// Unable to bind IFilter for embedded object | ||||
|     /// </summary> | ||||
|     FILTER_E_EMBEDDING_UNAVAILABLE = 0x80041707, | ||||
|     /// <summary> | ||||
|     /// Unable to bind IFilter for linked object | ||||
|     /// </summary> | ||||
|     FILTER_E_LINK_UNAVAILABLE = 0x80041708, | ||||
|     /// <summary> | ||||
|     ///  This is the last text in the current chunk | ||||
|     /// </summary> | ||||
|     FILTER_S_LAST_TEXT = 0x00041709, | ||||
|     /// <summary> | ||||
|     /// This is the last value in the current chunk | ||||
|     /// </summary> | ||||
|     FILTER_S_LAST_VALUES = 0x0004170A | ||||
|   } | ||||
|  | ||||
|   [ComImport, Guid("89BCB740-6119-101A-BCB7-00DD010655AF")] | ||||
|   [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] | ||||
|   internal interface IFilter | ||||
|   { | ||||
|     /// <summary> | ||||
|     /// The IFilter::Init method initializes a filtering session. | ||||
|     /// </summary> | ||||
|     [PreserveSig] | ||||
|     IFilterReturnCode Init( | ||||
|       //[in] Flag settings from the IFILTER_INIT enumeration for | ||||
|       // controlling text standardization, property output, embedding | ||||
|       // scope, and IFilter access patterns.  | ||||
|       IFILTER_INIT grfFlags, | ||||
|  | ||||
|       // [in] The size of the attributes array. When nonzero, cAttributes | ||||
|       //  takes  | ||||
|       // precedence over attributes specified in grfFlags. If no | ||||
|       // attribute flags  | ||||
|       // are specified and cAttributes is zero, the default is given by | ||||
|       // the  | ||||
|       // PSGUID_STORAGE storage property set, which contains the date and | ||||
|       //  time  | ||||
|       // of the last write to the file, size, and so on; and by the | ||||
|       //  PID_STG_CONTENTS  | ||||
|       // 'contents' property, which maps to the main contents of the | ||||
|       // file.  | ||||
|       // For more information about properties and property sets, see | ||||
|       // Property Sets.  | ||||
|       int cAttributes, | ||||
|  | ||||
|       //[in] Array of pointers to FULLPROPSPEC structures for the | ||||
|       // requested properties.  | ||||
|       // When cAttributes is nonzero, only the properties in aAttributes | ||||
|       // are returned.  | ||||
|       IntPtr aAttributes, | ||||
|  | ||||
|       // [out] Information about additional properties available to the | ||||
|       //  caller; from the IFILTER_FLAGS enumeration.  | ||||
|       out IFILTER_FLAGS pdwFlags); | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The IFilter::GetChunk method positions the filter at the beginning | ||||
|     /// of the next chunk,  | ||||
|     /// or at the first chunk if this is the first call to the GetChunk | ||||
|     /// method, and returns a description of the current chunk.  | ||||
|     /// </summary> | ||||
|     [PreserveSig] | ||||
|     IFilterReturnCode GetChunk(out STAT_CHUNK pStat); | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The IFilter::GetText method retrieves text (text-type properties) | ||||
|     /// from the current chunk,  | ||||
|     /// which must have a CHUNKSTATE enumeration value of CHUNK_TEXT. | ||||
|     /// </summary> | ||||
|     [PreserveSig] | ||||
|     IFilterReturnCode GetText( | ||||
|       // [in/out] On entry, the size of awcBuffer array in wide/Unicode | ||||
|       // characters. On exit, the number of Unicode characters written to | ||||
|       // awcBuffer.  | ||||
|       // Note that this value is not the number of bytes in the buffer.  | ||||
|       ref uint pcwcBuffer, | ||||
|  | ||||
|       // Text retrieved from the current chunk. Do not terminate the | ||||
|       // buffer with a character.   | ||||
|       [Out(), MarshalAs(UnmanagedType.LPArray)]  | ||||
|       char[] awcBuffer); | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The IFilter::GetValue method retrieves a value (public | ||||
|     /// value-type property) from a chunk,  | ||||
|     /// which must have a CHUNKSTATE enumeration value of CHUNK_VALUE. | ||||
|     /// </summary> | ||||
|     [PreserveSig] | ||||
|     int GetValue( | ||||
|       // Allocate the PROPVARIANT structure with CoTaskMemAlloc. Some | ||||
|       // PROPVARIANT  | ||||
|       // structures contain pointers, which can be freed by calling the | ||||
|       // PropVariantClear function.  | ||||
|       // It is up to the caller of the GetValue method to call the | ||||
|       //   PropVariantClear method.             | ||||
|       // ref IntPtr ppPropValue | ||||
|       // [MarshalAs(UnmanagedType.Struct)] | ||||
|       ref IntPtr PropVal); | ||||
|  | ||||
|     /// <summary> | ||||
|     /// The IFilter::BindRegion method retrieves an interface representing | ||||
|     /// the specified portion of the object.  | ||||
|     /// Currently reserved for future use. | ||||
|     /// </summary> | ||||
|     [PreserveSig] | ||||
|     int BindRegion(ref FILTERREGION origPos, | ||||
|       ref Guid riid, ref object ppunk); | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
		Reference in New Issue
	
	Block a user