1
2
3
4
5
6 """
7 Python Documentation Spell Checker.
8
9 The pydsc module contains functionality needed to check documentation strings
10 and comments for spelling errors, within Python code. The pydsc module depends
11 on PyEnchant spell checker. PyEnchant provides interface for different spell
12 engines:
13 * ispell
14 * aspell
15 * OpenOffice
16
17 The use of the pydsc module is very simple - just import pydsc and all modules
18 that will be imported after it will be checked. By default all spelling errors
19 will be printed to sys.stdout. The pydsc checker could be customized in many
20 different ways:
21 * you can define set of files/directories that should be included/excluded
22 from check process
23 * you can redefine error messages destination
24 * you can redefine and/or re-configurate spell checker
25
26 Install:
27 python setup.py install
28
29 Usage example:
30
31 import pydsc
32 import readline #errors will be printed to standart output
33
34 more complex example ( taken from pygccxml project ):
35
36 import pydsc
37 #check only pygccxml package
38 pydsc.include( r'D:\pygccxml_sources\sources\pygccxml_dev' )
39 pydsc.ignore( [ 'Yakovenko'
40 , 'Bierbaum'
41 , 'org'
42 , 'http'
43 , 'bool'
44 , 'str'
45 , 'www'
46 , 'param'
47 , 'txt'
48 , 'decl'
49 , 'decls'
50 , 'namespace'
51 , 'namespaces'
52 , 'enum'
53 , 'const'
54 , 'GCC'
55 , 'xcc'
56 , 'TODO'
57 , 'typedef'
58 , 'os'
59 , 'normcase'
60 , 'normpath'
61 , 'scopedef'
62 , 'ira'#part of Matthias mail address
63 , 'uka'#part of Matthias mail address
64 , 'de'#part of Matthias mail address
65 , 'dat'#file extension of directory cache
66 , 'config'#parameter description
67 , 'gccxml'#parameter description
68 , 'Py++'
69 , 'pygccxml'
70 , 'calldef'
71 , 'XXX'
72 , 'wstring'
73 , 'py' ] )
74 """
75
76 __version__ = '0.2'
77 __author__ = 'Roman Yakovenko <roman.yakovenko@gmail.com>'
78 __url__ = 'http://www.language-binding.net'
79 __license__ = 'Boost Software License <http://boost.org/more/license_info.html>'
80
81 import os
82 import sys
83 import pprint
84 import inspect
85 import __builtin__
86 from enchant import checker
94 """return os.path.normcase( os.path.normpath( some_path ) )"""
95 return os.path.normcase( os.path.normpath( some_path ) )
96
98 """The instance of this class will help user to define filter, that will
99 exclude modules from being checked"""
101 """defines few filter constants"""
102 INCLUDE = 'include'
103 EXCLUDE = 'exclude'
104
106 """
107 what - list of paths, could contain file and directory names
108 ftype - FILTER_TYPE constant
109 """
110 self.what = what
111 if None is self.what:
112 self.what = []
113 elif isinstance( self.what, str ):
114 self.what = [self.what]
115 self.what = map( normalize_path, self.what )
116 self.ftype = ftype
117
118 @staticmethod
120 """
121 returns true if one of the directories is root directory for the path,
122 false otherwise
123
124 @param path: path
125 @type path: str
126
127 @param dirs: list of directories and\\or files
128 @type dirs: [ str ]
129
130 @return: bool
131 """
132
133 return bool( filter( lambda dir: path.startswith( dir ), dirs ) )
134
135 - def check( self, source_file ):
142
144 """
145 applies spell check process on every imported module
146
147 This is the main class of this module. This class applies spell check
148 process on every imported module. Every documentation string within the
149 module will be checked. Some comments will be checked too. You should read
150 inspect module documentation, in order to find out which comments will be
151 checked.
152 """
153
154 - def __init__( self
155 , speller
156 , writer=None
157 , filter=None
158 , ignore_identifiers=True ):
159 """
160 initialization method
161
162 During this method, reference to __builtin__.__import__ function is
163 saved in one of the members of the class, and replaced with import_
164 member function.
165
166 @param speller: reference to enchant.checker.SpellChecker object
167 @type speller: enchant.checker.SpellChecker
168
169 @param writer: reference to instance of class that has write method.
170 By default sys.stdout will be used.
171
172 @param filter: list of files or directories
173 @type filter: [ str ]
174
175 @param filter_type: provides interpretation for content of filter parameter
176 @type filter_type: L{FILTER_TYPE}
177
178 @param ignore_identifiers: often comments/documentation strings contains
179 class\\method\\function names. Those names,
180 usually introduce spell error. If ignore_identifiers
181 set to True, those names will be ignored.
182 @type ignore_identifiers: bool
183 """
184 object.__init__( self )
185 self.__checked = set()
186 self.__orig_import = __builtin__.__import__
187 __builtin__.__import__ = self.import_
188 self.__already_imported = set( sys.modules.keys() )
189 self.__checked = set()
190 self.speller = speller
191 self.writer = writer
192 if self.writer is None:
193 self.writer = sys.stdout
194 self.filter = filter
195 self.ignored_words = set()
196 self.ignore_identifiers = ignore_identifiers
197
199 """returns True, if obj should be checked, False otherwise"""
200
201 if id(obj) in self.__checked:
202 return False
203 if inspect.isbuiltin( obj ):
204 return False
205 if inspect.ismodule( obj ):
206 if obj.__name__ in self.__already_imported:
207 return False
208 if self.filter:
209 try:
210 source_file = inspect.getsourcefile(obj)
211 if source_file is None:
212 source_file = inspect.getfile( obj )
213 return self.filter.check( source_file )
214 except TypeError:
215 return False
216 else:
217 return True
218 obj_module = inspect.getmodule( obj )
219 if not obj_module is module:
220 return False
221 if inspect.isclass( obj ) \
222 or inspect.ismethod( obj ) \
223 or inspect.isfunction( obj ) \
224 or inspect.isroutine( obj ) \
225 or inspect.ismethoddescriptor( obj ) \
226 or inspect.isdatadescriptor( obj ):
227 return True
228 return False
229
230 - def import_( self, name, globals=None, locals=None, fromlist=None ):
231 """Hook to import functionality"""
232 pymodule = self.__orig_import( name, globals, locals, fromlist )
233 if self.should_be_checked(pymodule):
234 self.__already_imported.add( name )
235 self.__check( pymodule )
236 return pymodule
237
238 - def __check_text_impl( self, obj, text, text_type ):
239 if not text:
240 return
241 if self.ignore_identifiers and hasattr( obj, '__name__' ) and obj.__name__:
242 self.ignored_words.add( obj.__name__ )
243 errors = {}
244 self.speller.set_text( text )
245 for error in self.speller:
246 if error.word in self.ignored_words:
247 continue
248 if not errors.has_key( error.word ):
249 errors[ error.word ] = []
250 errors[ error.word ] = self.speller.suggest()
251 if not errors:
252 return
253 write = self.writer.write
254 if inspect.getsourcefile( inspect.getmodule( obj ) ):
255 write( ' error details: %s' % os.linesep )
256 write( ' file : %s%s' % ( inspect.getsourcefile( inspect.getmodule( obj ) ), os.linesep ) )
257 write( ' line : %d%s' % ( inspect.getsourcelines( obj )[1], os.linesep ) )
258 write( ' text type : %s%s' % ( text_type, os.linesep ) )
259 else:
260 write( ' error details: %s' % os.linesep )
261 write( ' text type : %s%s' % ( text_type, os.linesep ) )
262 for word, suggestions in errors.items():
263 write( ' misspelled word: %s%s' % ( word, os.linesep ) )
264 write( ' suggestions : %s%s' % ( `suggestions`, os.linesep ) )
265
266 - def __check_text( self, obj):
267 self.__check_text_impl( obj, inspect.getdoc( obj ), 'documentation string' )
268 if inspect.getsourcefile( obj ):
269 self.__check_text_impl( obj, inspect.getcomments( obj ), 'comment' )
270
272 self.__check_text( module )
273 to_be_checked = map( lambda x: x[1], inspect.getmembers( module ) )
274 while to_be_checked:
275 member = to_be_checked.pop(0)
276 if not self.should_be_checked( member, module ):
277 continue
278 self.__check_text( member )
279 to_be_checked.extend( map( lambda x: x[1], inspect.getmembers( member ) ) )
280 self.__checked.add( id(member) )
281
282 """documentation spell checker instance"""
283 doc_checker = checker_t( checker.SpellChecker( "en_US" ) )
293
302
303 -def ignore( what, case_sensitive=False ):
304 """Adds word or list of words to the ignore list.
305
306 what - word(string) or list of words(strings) to be ignored.
307 """
308 if isinstance( what, str ):
309 if not case_sensitive:
310 what = what.lower()
311 doc_checker.ignored_words.add( what )
312 else:
313 for word in what:
314 if case_sensitive:
315 word = what.lower()
316 doc_checker.ignored_words.add( word )
317