Module pydsc

Source Code for Module pydsc

  1  # Copyright 2004 Roman Yakovenko. 
  2  # Distributed under the Boost Software License, Version 1.0. (See 
  3  # accompanying file LICENSE_1_0.txt or copy at 
  4  # http://www.boost.org/LICENSE_1_0.txt) 
  5   
  6  """ 
  7  Python Documentation Spell Checker. 
  8   
  9  The pydsc module contains functionality needed to check documentation strings 
 10  and comments for spelling errors, within Python code. The pydsc module depends 
 11  on PyEnchant spell checker. PyEnchant provides interface for different spell 
 12  engines: 
 13      * ispell 
 14      * aspell 
 15      * OpenOffice 
 16   
 17  The use of the pydsc module is very simple - just import pydsc and all modules 
 18  that will be imported after it will be checked. By default all spelling errors 
 19  will be printed to sys.stdout. The pydsc checker could be customized in many 
 20  different ways: 
 21      * you can define set of files/directories that should be included/excluded 
 22        from check process 
 23      * you can redefine error messages destination 
 24      * you can redefine and/or re-configurate spell checker 
 25   
 26  Install: 
 27  python setup.py install 
 28   
 29  Usage example: 
 30   
 31  import pydsc 
 32  import readline #errors will be printed to standart output 
 33   
 34  more complex example ( taken from pygccxml project ): 
 35   
 36  import pydsc 
 37  #check only pygccxml package 
 38  pydsc.include( r'D:\pygccxml_sources\sources\pygccxml_dev' ) 
 39  pydsc.ignore( [ 'Yakovenko' 
 40      , 'Bierbaum' 
 41      , 'org' 
 42      , 'http' 
 43      , 'bool' 
 44      , 'str' 
 45      , 'www' 
 46      , 'param' 
 47      , 'txt' 
 48      , 'decl' 
 49      , 'decls' 
 50      , 'namespace' 
 51      , 'namespaces' 
 52      , 'enum' 
 53      , 'const' 
 54      , 'GCC' 
 55      , 'xcc' 
 56      , 'TODO' 
 57      , 'typedef' 
 58      , 'os' 
 59      , 'normcase' 
 60      , 'normpath' 
 61      , 'scopedef' 
 62      , 'ira'#part of Matthias mail address 
 63      , 'uka'#part of Matthias mail address 
 64      , 'de'#part of Matthias mail address 
 65      , 'dat'#file extension of directory cache 
 66      , 'config'#parameter description 
 67      , 'gccxml'#parameter description 
 68      , 'Py++' 
 69      , 'pygccxml' 
 70      , 'calldef' 
 71      , 'XXX' 
 72      , 'wstring' 
 73      , 'py' ] ) 
 74  """ 
 75   
 76  __version__ = '0.2' #current version 
 77  __author__ = 'Roman Yakovenko <roman.yakovenko@gmail.com>' #Don't you want to know who is guilty? 
 78  __url__ = 'http://www.language-binding.net' #project home 
 79  __license__ = 'Boost Software License <http://boost.org/more/license_info.html>' #license 
 80   
 81  import os 
 82  import sys 
 83  import pprint 
 84  import inspect 
 85  import __builtin__ 
 86  from enchant import checker 
87 88 89 #TODO: source code encoding 90 # -*- coding: encoding -*- 91 # -*- coding: iso-8859-15 -*- 92 93 -def normalize_path( some_path ):
94 """return os.path.normcase( os.path.normpath( some_path ) )""" 95 return os.path.normcase( os.path.normpath( some_path ) )
96
97 -class filter_by_path_t:
98 """The instance of this class will help user to define filter, that will 99 exclude modules from being checked"""
100 - class FILTER_TYPE:
101 """defines few filter constants""" 102 INCLUDE = 'include' 103 EXCLUDE = 'exclude'
104
105 - def __init__( self, what, ftype ):
106 """ 107 what - list of paths, could contain file and directory names 108 ftype - FILTER_TYPE constant 109 """ 110 self.what = what 111 if None is self.what: 112 self.what = [] 113 elif isinstance( self.what, str ): 114 self.what = [self.what] 115 self.what = map( normalize_path, self.what ) 116 self.ftype = ftype
117 118 @staticmethod
119 - def contains_parent_dir( path, dirs ):
120 """ 121 returns true if one of the directories is root directory for the path, 122 false otherwise 123 124 @param path: path 125 @type path: str 126 127 @param dirs: list of directories and\\or files 128 @type dirs: [ str ] 129 130 @return: bool 131 """ 132 #precondition: dirs and fpath should be normalize_path'ed before calling this function 133 return bool( filter( lambda dir: path.startswith( dir ), dirs ) )
134
135 - def check( self, source_file ):
136 """returns True if source_file should be checked, False otherwise""" 137 source_file = normalize_path( source_file ) 138 if source_file in self.what or self.contains_parent_dir( source_file, self.what ): 139 return self.ftype == self.FILTER_TYPE.INCLUDE 140 else: 141 return self.ftype == self.FILTER_TYPE.EXCLUDE
142
143 -class checker_t( object ):
144 """ 145 applies spell check process on every imported module 146 147 This is the main class of this module. This class applies spell check 148 process on every imported module. Every documentation string within the 149 module will be checked. Some comments will be checked too. You should read 150 inspect module documentation, in order to find out which comments will be 151 checked. 152 """ 153
154 - def __init__( self 155 , speller 156 , writer=None 157 , filter=None 158 , ignore_identifiers=True ):
159 """ 160 initialization method 161 162 During this method, reference to __builtin__.__import__ function is 163 saved in one of the members of the class, and replaced with import_ 164 member function. 165 166 @param speller: reference to enchant.checker.SpellChecker object 167 @type speller: enchant.checker.SpellChecker 168 169 @param writer: reference to instance of class that has write method. 170 By default sys.stdout will be used. 171 172 @param filter: list of files or directories 173 @type filter: [ str ] 174 175 @param filter_type: provides interpretation for content of filter parameter 176 @type filter_type: L{FILTER_TYPE} 177 178 @param ignore_identifiers: often comments/documentation strings contains 179 class\\method\\function names. Those names, 180 usually introduce spell error. If ignore_identifiers 181 set to True, those names will be ignored. 182 @type ignore_identifiers: bool 183 """ 184 object.__init__( self ) 185 self.__checked = set() 186 self.__orig_import = __builtin__.__import__ 187 __builtin__.__import__ = self.import_ 188 self.__already_imported = set( sys.modules.keys() ) 189 self.__checked = set() 190 self.speller = speller 191 self.writer = writer 192 if self.writer is None: 193 self.writer = sys.stdout 194 self.filter = filter 195 self.ignored_words = set() 196 self.ignore_identifiers = ignore_identifiers
197
198 - def should_be_checked( self, obj, module=None ):
199 """returns True, if obj should be checked, False otherwise""" 200 201 if id(obj) in self.__checked: 202 return False 203 if inspect.isbuiltin( obj ): 204 return False 205 if inspect.ismodule( obj ): 206 if obj.__name__ in self.__already_imported: 207 return False #do not check already imported modules 208 if self.filter: 209 try: 210 source_file = inspect.getsourcefile(obj) 211 if source_file is None: 212 source_file = inspect.getfile( obj ) 213 return self.filter.check( source_file ) 214 except TypeError: 215 return False #built in module 216 else: 217 return True 218 obj_module = inspect.getmodule( obj ) 219 if not obj_module is module: 220 return False 221 if inspect.isclass( obj ) \ 222 or inspect.ismethod( obj ) \ 223 or inspect.isfunction( obj ) \ 224 or inspect.isroutine( obj ) \ 225 or inspect.ismethoddescriptor( obj ) \ 226 or inspect.isdatadescriptor( obj ): 227 return True 228 return False
229
230 - def import_( self, name, globals=None, locals=None, fromlist=None ):
231 """Hook to import functionality""" 232 pymodule = self.__orig_import( name, globals, locals, fromlist ) 233 if self.should_be_checked(pymodule): 234 self.__already_imported.add( name ) 235 self.__check( pymodule ) 236 return pymodule
237
238 - def __check_text_impl( self, obj, text, text_type ):
239 if not text: 240 return 241 if self.ignore_identifiers and hasattr( obj, '__name__' ) and obj.__name__: 242 self.ignored_words.add( obj.__name__ ) 243 errors = {} 244 self.speller.set_text( text ) 245 for error in self.speller: 246 if error.word in self.ignored_words: 247 continue 248 if not errors.has_key( error.word ): 249 errors[ error.word ] = [] 250 errors[ error.word ] = self.speller.suggest() 251 if not errors: 252 return 253 write = self.writer.write 254 if inspect.getsourcefile( inspect.getmodule( obj ) ): 255 write( ' error details: %s' % os.linesep ) 256 write( ' file : %s%s' % ( inspect.getsourcefile( inspect.getmodule( obj ) ), os.linesep ) ) 257 write( ' line : %d%s' % ( inspect.getsourcelines( obj )[1], os.linesep ) ) 258 write( ' text type : %s%s' % ( text_type, os.linesep ) ) 259 else: 260 write( ' error details: %s' % os.linesep ) 261 write( ' text type : %s%s' % ( text_type, os.linesep ) ) 262 for word, suggestions in errors.items(): 263 write( ' misspelled word: %s%s' % ( word, os.linesep ) ) 264 write( ' suggestions : %s%s' % ( `suggestions`, os.linesep ) )
265
266 - def __check_text( self, obj):
267 self.__check_text_impl( obj, inspect.getdoc( obj ), 'documentation string' ) 268 if inspect.getsourcefile( obj ): 269 self.__check_text_impl( obj, inspect.getcomments( obj ), 'comment' )
270
271 - def __check( self, module ):
272 self.__check_text( module ) 273 to_be_checked = map( lambda x: x[1], inspect.getmembers( module ) ) 274 while to_be_checked: 275 member = to_be_checked.pop(0) 276 if not self.should_be_checked( member, module ): 277 continue 278 self.__check_text( member ) 279 to_be_checked.extend( map( lambda x: x[1], inspect.getmembers( member ) ) ) 280 self.__checked.add( id(member) )
281 282 """documentation spell checker instance""" 283 doc_checker = checker_t( checker.SpellChecker( "en_US" ) )
284 285 -def exclude( what ):
286 """ 287 Convenience function. It will exclude all modules, that their source file or 288 parent directory belongs to "what". 289 290 what - list of paths, could contain file and directory names 291 """ 292 doc_checker.filter = filter_by_path_t( what, filter_by_path_t.FILTER_TYPE.EXCLUDE )
293
294 -def include( what ):
295 """ 296 Convenience function. It will include only modules, that their source file 297 or parent directory belongs to "what". 298 299 what - list of paths, could contain file and directory names 300 """ 301 doc_checker.filter = filter_by_path_t( what, filter_by_path_t.FILTER_TYPE.INCLUDE )
302
303 -def ignore( what, case_sensitive=False ):
304 """Adds word or list of words to the ignore list. 305 306 what - word(string) or list of words(strings) to be ignored. 307 """ 308 if isinstance( what, str ): 309 if not case_sensitive: 310 what = what.lower() 311 doc_checker.ignored_words.add( what ) 312 else: 313 for word in what: 314 if case_sensitive: 315 word = what.lower() 316 doc_checker.ignored_words.add( word )
317