|
| 1 | +# vim: set filetype=python fileencoding=utf-8: |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +#============================================================================# |
| 5 | +# # |
| 6 | +# Licensed under the Apache License, Version 2.0 (the "License"); # |
| 7 | +# you may not use this file except in compliance with the License. # |
| 8 | +# You may obtain a copy of the License at # |
| 9 | +# # |
| 10 | +# http://www.apache.org/licenses/LICENSE-2.0 # |
| 11 | +# # |
| 12 | +# Unless required by applicable law or agreed to in writing, software # |
| 13 | +# distributed under the License is distributed on an "AS IS" BASIS, # |
| 14 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # |
| 15 | +# See the License for the specific language governing permissions and # |
| 16 | +# limitations under the License. # |
| 17 | +# # |
| 18 | +#============================================================================# |
| 19 | + |
| 20 | + |
| 21 | +''' Pydoctor detection and metadata extraction. ''' |
| 22 | + |
| 23 | + |
| 24 | +from urllib.parse import ParseResult as _Url |
| 25 | + |
| 26 | +from . import __ |
| 27 | +from . import extraction as _extraction |
| 28 | +from . import urls as _urls |
| 29 | + |
| 30 | + |
| 31 | +_scribe = __.acquire_scribe( __name__ ) |
| 32 | + |
| 33 | + |
| 34 | +class PydoctorDetection( __.StructureDetection ): |
| 35 | + ''' Detection result for Pydoctor documentation sources. ''' |
| 36 | + |
| 37 | + source: str |
| 38 | + normalized_source: str = '' |
| 39 | + |
| 40 | + @classmethod |
| 41 | + def get_capabilities( cls ) -> __.StructureProcessorCapabilities: |
| 42 | + ''' Pydoctor processor capabilities. ''' |
| 43 | + return __.StructureProcessorCapabilities( |
| 44 | + supported_inventory_types = frozenset( { 'pydoctor' } ), |
| 45 | + content_extraction_features = frozenset( { |
| 46 | + __.ContentExtractionFeatures.Signatures, |
| 47 | + __.ContentExtractionFeatures.Descriptions, |
| 48 | + __.ContentExtractionFeatures.CodeExamples, |
| 49 | + } ), |
| 50 | + confidence_by_inventory_type = __.immut.Dictionary( { |
| 51 | + 'pydoctor': 1.0 |
| 52 | + } ) |
| 53 | + ) |
| 54 | + |
| 55 | + @classmethod |
| 56 | + async def from_source( |
| 57 | + selfclass, |
| 58 | + auxdata: __.ApplicationGlobals, |
| 59 | + processor: __.Processor, |
| 60 | + source: str, |
| 61 | + ) -> __.typx.Self: |
| 62 | + ''' Constructs detection from source location. ''' |
| 63 | + detection = await processor.detect( auxdata, source ) |
| 64 | + return __.typx.cast( __.typx.Self, detection ) |
| 65 | + |
| 66 | + async def extract_contents( |
| 67 | + self, |
| 68 | + auxdata: __.ApplicationGlobals, |
| 69 | + source: str, |
| 70 | + objects: __.cabc.Sequence[ __.InventoryObject ], /, |
| 71 | + ) -> tuple[ __.ContentDocument, ... ]: |
| 72 | + ''' Extracts documentation content for specified objects. ''' |
| 73 | + documents = await _extraction.extract_contents( |
| 74 | + auxdata, source, objects ) |
| 75 | + return tuple( documents ) |
| 76 | + |
| 77 | + |
| 78 | +async def detect_pydoctor( |
| 79 | + auxdata: __.ApplicationGlobals, base_url: _Url |
| 80 | +) -> float: |
| 81 | + ''' Detects if source is a Pydoctor documentation site. ''' |
| 82 | + confidence = 0.0 |
| 83 | + # Check for index.html |
| 84 | + index_url = _urls.derive_index_url( base_url ) |
| 85 | + try: |
| 86 | + html_content = await __.retrieve_url_as_text( |
| 87 | + auxdata.content_cache, |
| 88 | + index_url, duration_max = 10.0 ) |
| 89 | + except Exception as exc: |
| 90 | + _scribe.debug( f"Detection failed for {base_url.geturl( )}: {exc}" ) |
| 91 | + return confidence |
| 92 | + html_lower = html_content.lower( ) |
| 93 | + # Check for pydoctor meta tag (highest confidence) |
| 94 | + if '<meta name="generator" content="pydoctor' in html_lower: |
| 95 | + confidence = 1.0 |
| 96 | + # Check for characteristic CSS files |
| 97 | + elif 'apidocs.css' in html_lower: |
| 98 | + confidence = 0.8 |
| 99 | + # Check for Bootstrap-based navigation with pydoctor structure |
| 100 | + elif 'navbar navbar-default mainnavbar' in html_lower: |
| 101 | + confidence += 0.3 |
| 102 | + # Check for pydoctor-specific elements |
| 103 | + if 'class="docstring"' in html_lower: |
| 104 | + confidence += 0.2 |
| 105 | + return min( confidence, 1.0 ) |
0 commit comments