diff --git a/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/__init__.py b/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..5d3e80ca53ebc2de899a9bb1814151a9f101adc1 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/__init__.py @@ -0,0 +1 @@ +from .rss_deduplicate import RssDeduplicatePlugin \ No newline at end of file diff --git a/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/rss_deduplicate.py b/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/rss_deduplicate.py new file mode 100755 index 0000000000000000000000000000000000000000..26377f069b2e1c14906ceed400d51d810ab5c218 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/build/lib/mkdocs_rss_deduplicate/rss_deduplicate.py @@ -0,0 +1,47 @@ +from mkdocs.plugins import BasePlugin +from mkdocs.config import config_options +from xml.etree import ElementTree as ET +import re + +class RssDeduplicatePlugin(BasePlugin): + + def on_post_build(self, config): + # Paths to your RSS feed files + rss_created_path = config['site_dir'] + '/feed_rss_created.xml' + rss_updated_path = config['site_dir'] + '/feed_rss_updated.xml' + + # Call the deduplication function on both RSS feed files + self.remove_duplicates_from_rss(rss_created_path) + self.remove_duplicates_from_rss(rss_updated_path) + self.clean_image_tags(rss_created_path) + self.clean_image_tags(rss_updated_path) + + def clean_image_tags(self, feed_path): + tree = ET.parse(feed_path) + root = tree.getroot() + + for item in root.findall('./channel/item'): + description = item.find('description').text + # Use regex to remove the width attribute snippet from the description + cleaned_description = re.sub(r'{\s*width="45%"\s*}', '', description) + # Update the description text with the cleaned version + item.find('description').text = cleaned_description + + tree.write(feed_path, encoding='utf-8', xml_declaration=True) + + def remove_duplicates_from_rss(self, feed_path): + # Parse the XML file + tree = ET.parse(feed_path) + root = tree.getroot() + + # Function to remove duplicate entries based on <title> tag + seen_titles = set() + for item in root.findall('./channel/item'): + title = item.find('title').text + if title in seen_titles: + root.find('./channel').remove(item) + else: + seen_titles.add(title) + + # Save the deduplicated XML to the same file + tree.write(feed_path, encoding='utf-8', xml_declaration=True) diff --git a/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__init__.py b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..5d3e80ca53ebc2de899a9bb1814151a9f101adc1 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__init__.py @@ -0,0 +1 @@ +from .rss_deduplicate import RssDeduplicatePlugin \ No newline at end of file diff --git a/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/__init__.cpython-310.pyc b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d8711d70b605d63fd8e9b56e3b4b7ab636b644b Binary files /dev/null and b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/__init__.cpython-310.pyc differ diff --git a/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/rss_deduplicate.cpython-310.pyc b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/rss_deduplicate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..baee6f7007dfb93ce6fbf5dddde24fc5fe40cfdb Binary files /dev/null and b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/__pycache__/rss_deduplicate.cpython-310.pyc differ diff --git a/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/rss_deduplicate.py b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/rss_deduplicate.py new file mode 100755 index 0000000000000000000000000000000000000000..26377f069b2e1c14906ceed400d51d810ab5c218 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/mkdocs_rss_deduplicate/rss_deduplicate.py @@ -0,0 +1,47 @@ +from mkdocs.plugins import BasePlugin +from mkdocs.config import config_options +from xml.etree import ElementTree as ET +import re + +class RssDeduplicatePlugin(BasePlugin): + + def on_post_build(self, config): + # Paths to your RSS feed files + rss_created_path = config['site_dir'] + '/feed_rss_created.xml' + rss_updated_path = config['site_dir'] + '/feed_rss_updated.xml' + + # Call the deduplication function on both RSS feed files + self.remove_duplicates_from_rss(rss_created_path) + self.remove_duplicates_from_rss(rss_updated_path) + self.clean_image_tags(rss_created_path) + self.clean_image_tags(rss_updated_path) + + def clean_image_tags(self, feed_path): + tree = ET.parse(feed_path) + root = tree.getroot() + + for item in root.findall('./channel/item'): + description = item.find('description').text + # Use regex to remove the width attribute snippet from the description + cleaned_description = re.sub(r'{\s*width="45%"\s*}', '', description) + # Update the description text with the cleaned version + item.find('description').text = cleaned_description + + tree.write(feed_path, encoding='utf-8', xml_declaration=True) + + def remove_duplicates_from_rss(self, feed_path): + # Parse the XML file + tree = ET.parse(feed_path) + root = tree.getroot() + + # Function to remove duplicate entries based on <title> tag + seen_titles = set() + for item in root.findall('./channel/item'): + title = item.find('title').text + if title in seen_titles: + root.find('./channel').remove(item) + else: + seen_titles.add(title) + + # Save the deduplicated XML to the same file + tree.write(feed_path, encoding='utf-8', xml_declaration=True) diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/PKG-INFO b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/PKG-INFO new file mode 100755 index 0000000000000000000000000000000000000000..8e608bbb15121f3c05ec03c25436395ce490b582 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/PKG-INFO @@ -0,0 +1,16 @@ +Metadata-Version: 2.1 +Name: rss-deduplicate +Version: 0.1 +Summary: An MkDocs plugin to deduplicate RSS feed items (generated by the i18n-plugin) and clean up image tags post-build. +Author: tb +Keywords: rss_deduplicate + + + This MkDocs plugin performs two key functions to enhance the quality of RSS feeds generated from MkDocs sites: + + 1. Deduplicates entries in the RSS feed that may arise due to localization (i18n) or other site generation artifacts, ensuring each item is unique. + + 2. Cleans up image tags within the RSS feed descriptions to remove any site-specific markup or styling that is not compatible with RSS feed standards or display conventions in common RSS readers. + + This ensures that the RSS feeds are clean, concise, and compliant with RSS specifications, providing a better experience for subscribers of the feed. + diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/SOURCES.txt b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/SOURCES.txt new file mode 100755 index 0000000000000000000000000000000000000000..eda862a77e15b344b6d2425de5dff5a501ba2e22 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/SOURCES.txt @@ -0,0 +1,9 @@ +setup.py +mkdocs_rss_deduplicate/__init__.py +mkdocs_rss_deduplicate/rss_deduplicate.py +rss_deduplicate.egg-info/PKG-INFO +rss_deduplicate.egg-info/SOURCES.txt +rss_deduplicate.egg-info/dependency_links.txt +rss_deduplicate.egg-info/entry_points.txt +rss_deduplicate.egg-info/requires.txt +rss_deduplicate.egg-info/top_level.txt \ No newline at end of file diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/dependency_links.txt b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/dependency_links.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/entry_points.txt b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/entry_points.txt new file mode 100755 index 0000000000000000000000000000000000000000..5cb292d02ea347d09aec5e1b66fafcec70863ccb --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[mkdocs.plugins] +rss_deduplicate = mkdocs_rss_deduplicate.rss_deduplicate:RssDeduplicatePlugin diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/requires.txt b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/requires.txt new file mode 100755 index 0000000000000000000000000000000000000000..b135365ab27b7a14a9fc8323e8eac4f249d050df --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/requires.txt @@ -0,0 +1 @@ +mkdocs>=1.0.4 diff --git a/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/top_level.txt b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/top_level.txt new file mode 100755 index 0000000000000000000000000000000000000000..1651e24c4784bd7bd6f4580920f780614998925a --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/rss_deduplicate.egg-info/top_level.txt @@ -0,0 +1 @@ +mkdocs_rss_deduplicate diff --git a/customPlugins/mkdocs-rss-deduplicate/setup.py b/customPlugins/mkdocs-rss-deduplicate/setup.py new file mode 100755 index 0000000000000000000000000000000000000000..4f9d2114a02de4fec9e3b053681ae2089f6a0e72 --- /dev/null +++ b/customPlugins/mkdocs-rss-deduplicate/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup, find_packages + +setup( + name='rss_deduplicate', + version='0.1', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'mkdocs>=1.0.4' + ], + entry_points={ + 'mkdocs.plugins': [ + 'rss_deduplicate = mkdocs_rss_deduplicate.rss_deduplicate:RssDeduplicatePlugin', + ] + }, + author='tb', + description='An MkDocs plugin to deduplicate RSS feed items (generated by the i18n-plugin) and clean up image tags post-build.', + long_description=""" + This MkDocs plugin performs two key functions to enhance the quality of RSS feeds generated from MkDocs sites: + + 1. Deduplicates entries in the RSS feed that may arise due to localization (i18n) or other site generation artifacts, ensuring each item is unique. + + 2. Cleans up image tags within the RSS feed descriptions to remove any site-specific markup or styling that is not compatible with RSS feed standards or display conventions in common RSS readers. + + This ensures that the RSS feeds are clean, concise, and compliant with RSS specifications, providing a better experience for subscribers of the feed. + """, + + keywords='rss_deduplicate' +) \ No newline at end of file diff --git a/customPlugins/mkdocs_exclude_snippets/build/lib/mkdocs_exclude_snippets/__init__.py b/customPlugins/mkdocs_exclude_snippets/build/lib/mkdocs_exclude_snippets/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/customPlugins/mkdocs_exclude_snippets/build/lib/mkdocs_exclude_snippets/exclude_snippets_plugin.py b/customPlugins/mkdocs_exclude_snippets/build/lib/mkdocs_exclude_snippets/exclude_snippets_plugin.py new file mode 100755 index 0000000000000000000000000000000000000000..fe9dde61d6537fc55f2264064bec1cb1bc35df81 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/build/lib/mkdocs_exclude_snippets/exclude_snippets_plugin.py @@ -0,0 +1,120 @@ +from mkdocs.plugins import BasePlugin, event_priority +import json +import os + +class ExcludeStandaloneSnippetsPlugin(BasePlugin): + """ + A MkDocs plugin that excludes standalone snippet pages not listed in the site's navigation from the search index. + """ + + def flatten_nav(self, nav_items, parent_dir=''): + """ + Recursively flattens the navigation structure to a list of Markdown file paths. + + Args: + nav_items (list): The navigation items to process, which can be a mix of dictionaries (for nested navigation) and strings (for direct links). + parent_dir (str): The current parent directory path to prepend to nested navigation items, ensuring the full path is captured. + + Returns: + list: A list of strings, where each string is a path to a Markdown file included in the site's navigation. + """ + pages = [] + for item in nav_items: + if isinstance(item, dict): # Process nested navigation items + for nested_items in item.values(): + if isinstance(nested_items, list): + # Recurse into the list, carrying the current parent_dir + pages += self.flatten_nav(nested_items, parent_dir) + elif isinstance(nested_items, str) and nested_items.endswith('.md'): + # Add direct .md file references, prepending parent_dir if present + md_path = os.path.join(parent_dir, nested_items) + pages.append(md_path) + elif isinstance(item, str) and item.endswith('.md'): # Handle top-level .md files + md_path = os.path.join(parent_dir, item) + pages.append(md_path) + return pages + + @event_priority(-100) # Run this plugin's on_post_build event last + def on_post_build(self, config, **kwargs): + """ + The method called by MkDocs after the site has been built, to filter out standalone snippets from the search index. + + Args: + config (dict): The MkDocs config object containing site configuration details. + """ + search_index_path = os.path.join(config['site_dir'], 'search', 'search_index.json') + + with open(search_index_path, 'r') as file: + search_index = json.load(file) + + # Generate a list of normalized navigation paths for comparison + navigation_pages = [os.path.splitext(page)[0] for page in self.flatten_nav(config['nav'])] + + initial_count = len(search_index['docs']) + + filtered_docs = [] + for doc in search_index['docs']: + # Normalize the document's location for comparison, removing localization and section identifiers + parts = doc['location'].split('/') + normalized_parts = [part for part in parts if not part.startswith('#') and part != 'en'] + normalized_doc_path = '/'.join(normalized_parts).rstrip('/').replace('.md', '') + + # Include the document if its normalized path matches any navigation path + if normalized_doc_path in navigation_pages or any(normalized_doc_path.startswith(nav + '/') for nav in navigation_pages): + filtered_docs.append(doc) + # Optionally, for debugging purposes, you could uncomment the following lines to log + # documents that are identified for exclusion. This can be helpful during development + # and testing to verify that the plugin is correctly identifying documents to exclude. + # else: + # print(f"Exclude_Snippets: Excluding from search index: {doc['location']}") + + final_count = len(filtered_docs) + + # Update the search index if any documents were excluded + if initial_count != final_count: + search_index['docs'] = filtered_docs + with open(search_index_path, 'w') as file: + json.dump(search_index, file) + print("Exclude_Snippets: Successfully updated search_index.json with filtered documents.") + + # Optionally create a debug copy of the updated search index for comparison + # debug_search_index_path = os.path.join(config['site_dir'], 'search', 'search_index_debug.json') + # with open(debug_search_index_path, 'w') as debug_file: + # json.dump(search_index, debug_file) + # print("Exclude_Snippets: Created a debug copy of the updated search_index.json at 'search/search_index_debug.json'.") + + + def is_standalone_snippet(self, doc, navigation_pages): + """ + Determines if a document should be excluded from the search index. + + This method checks if the normalized path of a document is not present in the list + of navigation pages. The normalization process involves removing the '.md' extension + from the document's location for a consistent comparison with the navigation paths. + + Args: + doc (dict): A dictionary representing a document in the search index, where + 'location' is a key pointing to the document's path. + navigation_pages (list): A list of strings representing the paths of documents + included in the site's navigation, as normalized by + the flatten_nav method. + + Returns: + bool: True if the document is not found in the navigation pages and should be + excluded from the search index; False otherwise. + """ + # Normalize the document location by removing the '.md' extension. This step is crucial + # for ensuring that the document's path can be directly compared against the list of + # navigation paths, which have been similarly normalized. + normalized_doc_path = doc['location'].rstrip('.md') + + # Check if the normalized document path exists within the list of navigation pages. + # If the path is not found, the document is considered a standalone snippet that is not + # directly accessible through the site's navigation and thus should be excluded from + # the search index to avoid leading users to potentially orphaned or unintended pages. + is_excluded = normalized_doc_path not in navigation_pages + + return is_excluded + + + diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/PKG-INFO b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/PKG-INFO new file mode 100755 index 0000000000000000000000000000000000000000..8cb8f0912cb8ce3fdbf70ee03de5abaf73b51337 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/PKG-INFO @@ -0,0 +1,3 @@ +Metadata-Version: 2.1 +Name: mkdocs-exclude-snippets +Version: 0.1 diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/SOURCES.txt b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/SOURCES.txt new file mode 100755 index 0000000000000000000000000000000000000000..d157cd5d97fc88f7c5d3c24fcce000a8e5fdd9d3 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/SOURCES.txt @@ -0,0 +1,9 @@ +setup.py +mkdocs_exclude_snippets/__init__.py +mkdocs_exclude_snippets/exclude_snippets_plugin.py +mkdocs_exclude_snippets.egg-info/PKG-INFO +mkdocs_exclude_snippets.egg-info/SOURCES.txt +mkdocs_exclude_snippets.egg-info/dependency_links.txt +mkdocs_exclude_snippets.egg-info/entry_points.txt +mkdocs_exclude_snippets.egg-info/requires.txt +mkdocs_exclude_snippets.egg-info/top_level.txt \ No newline at end of file diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/dependency_links.txt b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/dependency_links.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/entry_points.txt b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/entry_points.txt new file mode 100755 index 0000000000000000000000000000000000000000..e458d2f7a9b22c7f4358a4c97fea3a1efd694ac6 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[mkdocs.plugins] +exclude_snippets = mkdocs_exclude_snippets.exclude_snippets_plugin:ExcludeStandaloneSnippetsPlugin diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/requires.txt b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/requires.txt new file mode 100755 index 0000000000000000000000000000000000000000..d912869d8f9df5705f8ca2b5dc34bd5cd41a7ffc --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/requires.txt @@ -0,0 +1,2 @@ +mkdocs +pyyaml diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/top_level.txt b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/top_level.txt new file mode 100755 index 0000000000000000000000000000000000000000..12de9a89597871f1f0caa0e52364fd58ea55a684 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets.egg-info/top_level.txt @@ -0,0 +1 @@ +mkdocs_exclude_snippets diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__init__.py b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-310.pyc b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7a728b0594613b716c96437833f1da151edb280 Binary files /dev/null and b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-310.pyc differ diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-311.pyc b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-311.pyc new file mode 100755 index 0000000000000000000000000000000000000000..6f663430ae53d28aa1b27bc1dce79669837fa27c Binary files /dev/null and b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/__init__.cpython-311.pyc differ diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-310.pyc b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..309803a909356ed964b2e67f49cc9aaa9b9e2a86 Binary files /dev/null and b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-310.pyc differ diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-311.pyc b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-311.pyc new file mode 100755 index 0000000000000000000000000000000000000000..50a5190a4802b7ad596c5c0a0a7d4cc33388af2d Binary files /dev/null and b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/__pycache__/exclude_snippets_plugin.cpython-311.pyc differ diff --git a/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/exclude_snippets_plugin.py b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/exclude_snippets_plugin.py new file mode 100755 index 0000000000000000000000000000000000000000..fe9dde61d6537fc55f2264064bec1cb1bc35df81 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/mkdocs_exclude_snippets/exclude_snippets_plugin.py @@ -0,0 +1,120 @@ +from mkdocs.plugins import BasePlugin, event_priority +import json +import os + +class ExcludeStandaloneSnippetsPlugin(BasePlugin): + """ + A MkDocs plugin that excludes standalone snippet pages not listed in the site's navigation from the search index. + """ + + def flatten_nav(self, nav_items, parent_dir=''): + """ + Recursively flattens the navigation structure to a list of Markdown file paths. + + Args: + nav_items (list): The navigation items to process, which can be a mix of dictionaries (for nested navigation) and strings (for direct links). + parent_dir (str): The current parent directory path to prepend to nested navigation items, ensuring the full path is captured. + + Returns: + list: A list of strings, where each string is a path to a Markdown file included in the site's navigation. + """ + pages = [] + for item in nav_items: + if isinstance(item, dict): # Process nested navigation items + for nested_items in item.values(): + if isinstance(nested_items, list): + # Recurse into the list, carrying the current parent_dir + pages += self.flatten_nav(nested_items, parent_dir) + elif isinstance(nested_items, str) and nested_items.endswith('.md'): + # Add direct .md file references, prepending parent_dir if present + md_path = os.path.join(parent_dir, nested_items) + pages.append(md_path) + elif isinstance(item, str) and item.endswith('.md'): # Handle top-level .md files + md_path = os.path.join(parent_dir, item) + pages.append(md_path) + return pages + + @event_priority(-100) # Run this plugin's on_post_build event last + def on_post_build(self, config, **kwargs): + """ + The method called by MkDocs after the site has been built, to filter out standalone snippets from the search index. + + Args: + config (dict): The MkDocs config object containing site configuration details. + """ + search_index_path = os.path.join(config['site_dir'], 'search', 'search_index.json') + + with open(search_index_path, 'r') as file: + search_index = json.load(file) + + # Generate a list of normalized navigation paths for comparison + navigation_pages = [os.path.splitext(page)[0] for page in self.flatten_nav(config['nav'])] + + initial_count = len(search_index['docs']) + + filtered_docs = [] + for doc in search_index['docs']: + # Normalize the document's location for comparison, removing localization and section identifiers + parts = doc['location'].split('/') + normalized_parts = [part for part in parts if not part.startswith('#') and part != 'en'] + normalized_doc_path = '/'.join(normalized_parts).rstrip('/').replace('.md', '') + + # Include the document if its normalized path matches any navigation path + if normalized_doc_path in navigation_pages or any(normalized_doc_path.startswith(nav + '/') for nav in navigation_pages): + filtered_docs.append(doc) + # Optionally, for debugging purposes, you could uncomment the following lines to log + # documents that are identified for exclusion. This can be helpful during development + # and testing to verify that the plugin is correctly identifying documents to exclude. + # else: + # print(f"Exclude_Snippets: Excluding from search index: {doc['location']}") + + final_count = len(filtered_docs) + + # Update the search index if any documents were excluded + if initial_count != final_count: + search_index['docs'] = filtered_docs + with open(search_index_path, 'w') as file: + json.dump(search_index, file) + print("Exclude_Snippets: Successfully updated search_index.json with filtered documents.") + + # Optionally create a debug copy of the updated search index for comparison + # debug_search_index_path = os.path.join(config['site_dir'], 'search', 'search_index_debug.json') + # with open(debug_search_index_path, 'w') as debug_file: + # json.dump(search_index, debug_file) + # print("Exclude_Snippets: Created a debug copy of the updated search_index.json at 'search/search_index_debug.json'.") + + + def is_standalone_snippet(self, doc, navigation_pages): + """ + Determines if a document should be excluded from the search index. + + This method checks if the normalized path of a document is not present in the list + of navigation pages. The normalization process involves removing the '.md' extension + from the document's location for a consistent comparison with the navigation paths. + + Args: + doc (dict): A dictionary representing a document in the search index, where + 'location' is a key pointing to the document's path. + navigation_pages (list): A list of strings representing the paths of documents + included in the site's navigation, as normalized by + the flatten_nav method. + + Returns: + bool: True if the document is not found in the navigation pages and should be + excluded from the search index; False otherwise. + """ + # Normalize the document location by removing the '.md' extension. This step is crucial + # for ensuring that the document's path can be directly compared against the list of + # navigation paths, which have been similarly normalized. + normalized_doc_path = doc['location'].rstrip('.md') + + # Check if the normalized document path exists within the list of navigation pages. + # If the path is not found, the document is considered a standalone snippet that is not + # directly accessible through the site's navigation and thus should be excluded from + # the search index to avoid leading users to potentially orphaned or unintended pages. + is_excluded = normalized_doc_path not in navigation_pages + + return is_excluded + + + diff --git a/customPlugins/mkdocs_exclude_snippets/setup.py b/customPlugins/mkdocs_exclude_snippets/setup.py new file mode 100755 index 0000000000000000000000000000000000000000..8d34d2fc3067c1be6f43e644f1de855be8f02076 --- /dev/null +++ b/customPlugins/mkdocs_exclude_snippets/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup, find_packages + +setup( + name='mkdocs-exclude-snippets', + version='0.1', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'mkdocs', + 'pyyaml', # Ensure you include all necessary dependencies + ], + entry_points={ + 'mkdocs.plugins': [ + 'exclude_snippets = mkdocs_exclude_snippets.exclude_snippets_plugin:ExcludeStandaloneSnippetsPlugin', + ] + } +) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1692cfe5665e4d7b27e40e04be01f9c441db27e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,50 @@ +Babel==2.13.1 +bracex==2.4 +certifi==2023.7.22 +charset-normalizer==3.3.2 +click==8.1.7 +colorama==0.4.6 +ghp-import==2.1.0 +gitdb==4.0.11 +GitPython==3.1.40 +idna==3.4 +Jinja2==3.1.2 +lxml==4.9.3 +Markdown==3.5.1 +MarkupSafe==2.1.3 +mergedeep==1.3.4 +mkdocs==1.5.3 +mkdocs-footermatter==1.3.2 +mkdocs-git-revision-date-localized-plugin==1.2.1 +mkdocs-glightbox==0.3.4 +mkdocs-material==9.4.7 +mkdocs-material-extensions==1.3 +mkdocs-rss-plugin==1.8.0 +mkdocs-static-i18n==1.2.0 +mkdocs-table-reader-plugin==2.0.3 +mkdocs-video==1.5.0 +numpy==1.26.1 +packaging==23.2 +paginate==0.5.6 +pandas==2.1.2 +pathspec==0.11.2 +pendulum==2.1.2 +platformdirs==3.11.0 +Pygments==2.16.1 +pymdown-extensions==10.3.1 +python-dateutil==2.8.2 +pytz==2023.3.post1 +pytzdata==2020.1 +PyYAML==6.0.1 +pyyaml_env_tag==0.1 +regex==2023.10.3 +requests==2.31.0 +six==1.16.0 +smmap==5.0.1 +tabulate==0.9.0 +tzdata==2023.3 +urllib3==2.0.7 +watchdog==3.0.0 +wcmatch==8.5.2 +-e ./customPlugins/mkdocs_exclude_snippets +-e ./customPlugins/mkdocs-rss-deduplicate