Source code for duck.http.mimes



[docs]
def guess_file_mimetype(filename: str) -> str:
    """
    Determine the MIME type of a file based on its filename or content.

    This function first attempts to guess the MIME type using the file's
    extension by leveraging the `mimetypes` module. If the MIME type could not
    be determined from the filename, it reads the file content and attempts to
    infer the MIME type from the data.

    Args:
        filename (str): The path to the file for which the MIME type needs to be determined.

    Returns:
        str: The determined MIME type of the file. Defaults to 'application/octet-stream' if the type cannot be determined.
    """
    import mimetypes
    
    mimetype = None

    if filename:
        # Attempt to guess the MIME type based on the file extension
        mimetype, _ = mimetypes.guess_type(filename)
    return mimetype




[docs]
def guess_data_mimetype(data: bytes) -> str:
    """
    Determine the MIME type of the provided content or file based on its initial bytes.

    This function checks the initial bytes of the data to infer its MIME type by matching
    known signatures for various file types (e.g., images, text, compressed files, etc.).
    If no known signature is detected, it defaults to 'application/octet-stream'.

    Args:
        data (bytes): The input data for which the MIME type needs to be determined.

    Returns:
        str: The determined MIME type of the input data.
    """
    html_tags = [
        b"<html",
        b"<!DOCTYPE html",
        b"<head>",
        b"<body>",
        b"<title>",
        b"<h1>",
        b"<div>",
        b"<span>",
        b"<p>",
        b"<a ",
        b"<img ",
        b"<script",
        b"<style",
        b"<meta",
        b"<link",
        b"<form",
        b"<table>",
        b"<tr>",
        b"<td>",
        b"<th>",
        b"<ul>",
        b"<ol>",
        b"<li>",
        b"<header>",
        b"<footer>",
        b"<nav>",
        b"<section>",
        b"<article>",
        b"<aside>",
        b"<main>",
        b"<figure>",
        b"<figcaption>",
        b"<blockquote>",
        b"<pre>",
        b"<code>",
        b"<canvas>",
        b"<svg>",
        b"<br",
        b"<b>",
    ]
    if data.startswith(b"\xFF\xD8"):
        return "image/jpeg"  # JPEG format
    elif data.startswith(b"\x89PNG\r\n\x1A\n"):
        return "image/png"  # PNG format
    elif data.startswith(b"GIF89a") or data.startswith(b"GIF87a"):
        return "image/gif"  # GIF format
    elif data.startswith(b"\x42\x4D"):  # BMP (Bitmap)
        return "image/bmp"  # BMP format
    elif data.startswith(b"\x47\x49\x46\x38"):  # WEBP (WebP Image)
        return "image/webp"  # WebP format
    elif data.startswith(b"\x00\x00\x01\x00"):  # ICO (Icon)
        return "image/vnd.microsoft.icon"  # ICO format
    elif data.startswith(b"\x49\x20\x20\x00"):  # HEIF (High Efficiency Image Format)
        return "image/heif"  # HEIF format
    elif data.startswith(b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"):  # TIFF (TIFF Image)
        return "image/tiff"  # TIFF format
    elif data.startswith(b"II*\x00") or data.startswith(b"MM\x00*"):  # TIFF (Big-endian or Little-endian)
        return "image/tiff"  # TIFF format
    elif data.startswith(b"%PDF-"):
        return "application/pdf"
    elif data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):  # DOC signature
        return "application/msword"
    elif data.startswith(b"PK\x03\x04"):
        return "application/zip"
    elif data.startswith(b"PK\x05\x06") or data.startswith(b"PK\x07\x08"):
        return "application/zip"
    elif data.startswith(b"\x1F\x8B"):
        return "application/gzip"
    elif any(tag in data[:500] for tag in html_tags):
        return "text/html"
    elif data.startswith(b"{") and data.rstrip().endswith(b"}"):
        return "application/json"
    elif data.startswith(b"<"):
        return "application/xml"
    elif data.startswith(b"OggS"):
        return "application/ogg"
    elif data.startswith(b"\x00\x00\x00\x18ftyp"):
        return "video/mp4"
    elif data.startswith(b"\x52\x49\x46\x46") and data[8:12] == b"AVI ":
        return "video/x-msvideo"
    elif data.startswith(b"MThd"):
        return "audio/midi"
    elif data.startswith(b"ID3") or data[0:2] == b"\xff\xfb":
        return "audio/mpeg"
    elif data.startswith(b"/*") or data.startswith(b"@charset") or b"{\n" in data[:500] or b"{\r\n" in data[:500]:
        return "text/css"
    elif data.startswith(b"//") or data.startswith(b"/*") or b"function " in data[:500] or b"var " in data[:500] or b"let " in data[:500] or b"const " in data[:500]:
        return "application/javascript"
    elif all(32 <= byte <= 126 or byte in (9, 10, 13) for byte in data):
        return "text/plain"
    else:
        return "application/octet-stream"