Source code for duck.utils.slug
"""
Slug Utilities Module
This module provides various utilities for generating, manipulating, and validating slugs.
A slug is a URL-friendly string, typically used in website URLs to represent titles or categories.
These functions allow for tasks such as slug creation from text, slug-to-text conversion, validation,
cleaning, and various string manipulations specific to slugs.
Functions include:
- slugify: Converts a string to a URL-friendly slug.
- unslugify: Converts a slug back to a human-readable string.
- is_valid_slug: Checks if a string is a valid slug.
- generate_slug_from_string: Generates a slug from a given string.
- clean_slug: Cleans up a slug to ensure it's properly formatted.
- split_slug: Splits a slug into individual words.
- join_slug: Joins a list of words into a slug.
- truncate_slug: Truncates a slug to a specified maximum length.
- sanitize_slug: Sanitizes a slug by removing invalid characters.
These utilities are useful for web developers handling slugs for SEO, URLs, or other string-related tasks.
"""
import re
import unicodedata
[docs]
def slugify(text: str, separator: str = "-") -> str:
"""
Convert a string to a URL-friendly slug.
Args:
text (str): The input string to be converted.
separator (str): The character to replace spaces with (default is "-").
Returns:
str: The generated slug.
"""
text = unicodedata.normalize('NFKD', text) # Normalize Unicode characters
text = text.lower() # Convert to lowercase
text = re.sub(r'[^\w\s-]', '', text) # Remove non-alphanumeric characters except spaces and hyphens
text = re.sub(r'[\s-]+', separator, text) # Replace spaces and hyphens with separator
text = text.strip(separator) # Remove leading/trailing separator
return text
[docs]
def unslugify(slug: str, separator: str = "-") -> str:
"""
Convert a slug back to a normal string.
Args:
slug (str): The slug to be converted.
separator (str): The separator used in the slug (default is "-").
Returns:
str: The original string.
"""
slug = slug.replace(separator, " ") # Replace separator with spaces
slug = re.sub(r'([a-z])([A-Z])', r'\1 \2', slug) # Add space before uppercase letters
return slug.capitalize() # Capitalize the first letter of the string
[docs]
def is_valid_slug(slug: str, separator: str = "-") -> bool:
"""
Check if a string is a valid slug.
Args:
slug (str): The slug to be checked.
separator (str): The separator used in the slug (default is "-").
Returns:
bool: True if the string is a valid slug, False otherwise.
"""
pattern = r'^[a-z0-9' + re.escape(separator) + r']+$' # Allow lowercase letters, numbers, and separator
return bool(re.match(pattern, slug))
[docs]
def generate_slug_from_string(text: str, separator: str = "-") -> str:
"""
Generate a slug from a given string.
Args:
text (str): The input string.
separator (str): The separator to be used in the generated slug (default is "-").
Returns:
str: The generated slug.
"""
return slugify(text, separator)
[docs]
def clean_slug(slug: str, separator: str = "-") -> str:
"""
Clean up a slug by ensuring it's lowercase and properly formatted.
Args:
slug (str): The slug to clean.
separator (str): The separator used in the slug (default is "-").
Returns:
str: The cleaned slug.
"""
slug = slug.strip(separator) # Remove leading/trailing separators
slug = re.sub(r'[^a-z0-9' + re.escape(separator) + r']', '', slug) # Remove invalid characters
return slug.lower()
[docs]
def split_slug(slug: str, separator: str = "-") -> list:
"""
Split a slug into individual words.
Args:
slug (str): The slug to be split.
separator (str): The separator used in the slug (default is "-").
Returns:
list: The list of words in the slug.
"""
return slug.split(separator)
[docs]
def join_slug(words: list, separator: str = "-") -> str:
"""
Join a list of words into a slug.
Args:
words (list): The list of words to be joined.
separator (str): The separator to use between words (default is "-").
Returns:
str: The joined slug.
"""
return separator.join(words)
[docs]
def truncate_slug(slug: str, max_length: int, separator: str = "-") -> str:
"""
Truncate a slug to a specified maximum length.
Args:
slug (str): The slug to truncate.
max_length (int): The maximum allowed length of the slug.
separator (str): The separator used in the slug (default is "-").
Returns:
str: The truncated slug.
"""
if len(slug) <= max_length:
return slug
# Truncate the slug and ensure it doesn't cut in the middle of a word
truncated = slug[:max_length]
last_separator = truncated.rfind(separator)
if last_separator != -1:
return truncated[:last_separator]
return truncated
[docs]
def sanitize_slug(slug: str, separator: str = "-") -> str:
"""
Sanitize a slug to ensure it contains only valid characters.
Args:
slug (str): The slug to sanitize.
separator (str): The separator used in the slug (default is "-").
Returns:
str: The sanitized slug.
"""
slug = re.sub(r'[^a-z0-9' + re.escape(separator) + r']', '', slug) # Remove invalid characters
return slug.lower()