975 lines
No EOL
65 KiB
HTML
975 lines
No EOL
65 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="quarto-1.6.33">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||
|
||
<meta name="author" content="AI Tools Suite">
|
||
<meta name="dcterms.date" content="2024-12-23">
|
||
|
||
<title>Building a Privacy Scanner: A Step-by-Step Implementation Guide</title>
|
||
<style>
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||
div.column{flex: auto; overflow-x: auto;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
ul.task-list li input[type="checkbox"] {
|
||
width: 0.8em;
|
||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||
vertical-align: middle;
|
||
}
|
||
/* CSS for syntax highlighting */
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
}
|
||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
</style>
|
||
|
||
|
||
<script src="building-privacy-scanner_files/libs/clipboard/clipboard.min.js"></script>
|
||
<script src="building-privacy-scanner_files/libs/quarto-html/quarto.js"></script>
|
||
<script src="building-privacy-scanner_files/libs/quarto-html/popper.min.js"></script>
|
||
<script src="building-privacy-scanner_files/libs/quarto-html/tippy.umd.min.js"></script>
|
||
<script src="building-privacy-scanner_files/libs/quarto-html/anchor.min.js"></script>
|
||
<link href="building-privacy-scanner_files/libs/quarto-html/tippy.css" rel="stylesheet">
|
||
<link href="building-privacy-scanner_files/libs/quarto-html/quarto-syntax-highlighting-07ba0ad10f5680c660e360ac31d2f3b6.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||
<script src="building-privacy-scanner_files/libs/bootstrap/bootstrap.min.js"></script>
|
||
<link href="building-privacy-scanner_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||
<link href="building-privacy-scanner_files/libs/bootstrap/bootstrap-fe6593aca1dacbc749dc3d2ba78c8639.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
|
||
|
||
|
||
</head>
|
||
|
||
<body>
|
||
|
||
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
|
||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||
<nav id="TOC" role="doc-toc" class="toc-active">
|
||
<h2 id="toc-title">Table of contents</h2>
|
||
|
||
<ul>
|
||
<li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction">Introduction</a></li>
|
||
<li><a href="#step-1-project-structure" id="toc-step-1-project-structure" class="nav-link" data-scroll-target="#step-1-project-structure">Step 1: Project Structure</a></li>
|
||
<li><a href="#step-2-define-pii-patterns" id="toc-step-2-define-pii-patterns" class="nav-link" data-scroll-target="#step-2-define-pii-patterns">Step 2: Define PII Patterns</a></li>
|
||
<li><a href="#step-3-build-the-basic-detection-engine" id="toc-step-3-build-the-basic-detection-engine" class="nav-link" data-scroll-target="#step-3-build-the-basic-detection-engine">Step 3: Build the Basic Detection Engine</a></li>
|
||
<li><a href="#step-4-add-text-normalization-layer-2" id="toc-step-4-add-text-normalization-layer-2" class="nav-link" data-scroll-target="#step-4-add-text-normalization-layer-2">Step 4: Add Text Normalization (Layer 2)</a></li>
|
||
<li><a href="#step-5-implement-checksum-validation-layer-4" id="toc-step-5-implement-checksum-validation-layer-4" class="nav-link" data-scroll-target="#step-5-implement-checksum-validation-layer-4">Step 5: Implement Checksum Validation (Layer 4)</a></li>
|
||
<li><a href="#step-6-json-blob-extraction-layer-2.5" id="toc-step-6-json-blob-extraction-layer-2.5" class="nav-link" data-scroll-target="#step-6-json-blob-extraction-layer-2.5">Step 6: JSON Blob Extraction (Layer 2.5)</a></li>
|
||
<li><a href="#step-7-base64-auto-decoding-layer-2.6" id="toc-step-7-base64-auto-decoding-layer-2.6" class="nav-link" data-scroll-target="#step-7-base64-auto-decoding-layer-2.6">Step 7: Base64 Auto-Decoding (Layer 2.6)</a></li>
|
||
<li><a href="#step-8-build-the-fastapi-endpoint" id="toc-step-8-build-the-fastapi-endpoint" class="nav-link" data-scroll-target="#step-8-build-the-fastapi-endpoint">Step 8: Build the FastAPI Endpoint</a></li>
|
||
<li><a href="#step-9-create-the-sveltekit-frontend" id="toc-step-9-create-the-sveltekit-frontend" class="nav-link" data-scroll-target="#step-9-create-the-sveltekit-frontend">Step 9: Create the SvelteKit Frontend</a></li>
|
||
<li><a href="#step-10-add-security-features" id="toc-step-10-add-security-features" class="nav-link" data-scroll-target="#step-10-add-security-features">Step 10: Add Security Features</a></li>
|
||
<li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion">Conclusion</a></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
<main class="content" id="quarto-document-content">
|
||
|
||
<header id="title-block-header" class="quarto-title-block default">
|
||
<div class="quarto-title">
|
||
<h1 class="title">Building a Privacy Scanner: A Step-by-Step Implementation Guide</h1>
|
||
<div class="quarto-categories">
|
||
<div class="quarto-category">tutorial</div>
|
||
<div class="quarto-category">privacy</div>
|
||
<div class="quarto-category">pii-detection</div>
|
||
<div class="quarto-category">python</div>
|
||
<div class="quarto-category">svelte</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="quarto-title-meta">
|
||
|
||
<div>
|
||
<div class="quarto-title-meta-heading">Author</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p>AI Tools Suite </p>
|
||
</div>
|
||
</div>
|
||
|
||
<div>
|
||
<div class="quarto-title-meta-heading">Published</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="date">December 23, 2024</p>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</header>
|
||
|
||
|
||
<section id="introduction" class="level2">
|
||
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
|
||
<p>In this tutorial, we’ll build a production-grade Privacy Scanner from scratch. By the end, you’ll have a tool that detects 40+ types of Personally Identifiable Information (PII) using an eight-layer detection pipeline, complete with a modern web interface.</p>
|
||
<p>Our stack: <strong>FastAPI</strong> for the backend API, <strong>SvelteKit</strong> for the frontend, and <strong>Python regex</strong> with validation logic for detection.</p>
|
||
</section>
|
||
<section id="step-1-project-structure" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-1-project-structure">Step 1: Project Structure</h2>
|
||
<p>First, create the project scaffolding:</p>
|
||
<div class="sourceCode" id="cb1"><pre class="sourceCode numberSource bash number-lines code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1"></a><span class="fu">mkdir</span> <span class="at">-p</span> ai_tools_suite/<span class="dt">{backend/routers</span><span class="op">,</span><span class="dt">frontend/src/routes/privacy-scanner}</span></span>
|
||
<span id="cb1-2"><a href="#cb1-2"></a><span class="bu">cd</span> ai_tools_suite</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>Your directory structure should look like:</p>
|
||
<pre><code>ai_tools_suite/
|
||
├── backend/
|
||
│ ├── main.py
|
||
│ └── routers/
|
||
│ └── privacy.py
|
||
└── frontend/
|
||
└── src/
|
||
└── routes/
|
||
└── privacy-scanner/
|
||
└── +page.svelte</code></pre>
|
||
</section>
|
||
<section id="step-2-define-pii-patterns" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-2-define-pii-patterns">Step 2: Define PII Patterns</h2>
|
||
<p>The foundation of any PII scanner is its pattern library. Create <code>backend/routers/privacy.py</code> and start with the core patterns:</p>
|
||
<div class="sourceCode" id="cb3"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a><span class="im">import</span> re</span>
|
||
<span id="cb3-2"><a href="#cb3-2"></a><span class="im">from</span> typing <span class="im">import</span> List, Dict, Any</span>
|
||
<span id="cb3-3"><a href="#cb3-3"></a><span class="im">from</span> pydantic <span class="im">import</span> BaseModel</span>
|
||
<span id="cb3-4"><a href="#cb3-4"></a></span>
|
||
<span id="cb3-5"><a href="#cb3-5"></a><span class="kw">class</span> PIIEntity(BaseModel):</span>
|
||
<span id="cb3-6"><a href="#cb3-6"></a> <span class="bu">type</span>: <span class="bu">str</span></span>
|
||
<span id="cb3-7"><a href="#cb3-7"></a> value: <span class="bu">str</span></span>
|
||
<span id="cb3-8"><a href="#cb3-8"></a> start: <span class="bu">int</span></span>
|
||
<span id="cb3-9"><a href="#cb3-9"></a> end: <span class="bu">int</span></span>
|
||
<span id="cb3-10"><a href="#cb3-10"></a> confidence: <span class="bu">float</span></span>
|
||
<span id="cb3-11"><a href="#cb3-11"></a> context: <span class="bu">str</span> <span class="op">=</span> <span class="st">""</span></span>
|
||
<span id="cb3-12"><a href="#cb3-12"></a></span>
|
||
<span id="cb3-13"><a href="#cb3-13"></a>PII_PATTERNS <span class="op">=</span> {</span>
|
||
<span id="cb3-14"><a href="#cb3-14"></a> <span class="co"># Identity Documents</span></span>
|
||
<span id="cb3-15"><a href="#cb3-15"></a> <span class="st">"SSN"</span>: {</span>
|
||
<span id="cb3-16"><a href="#cb3-16"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b\d</span><span class="sc">{3}</span><span class="vs">-\d</span><span class="sc">{2}</span><span class="vs">-\d</span><span class="sc">{4}</span><span class="vs">\b'</span>,</span>
|
||
<span id="cb3-17"><a href="#cb3-17"></a> <span class="st">"description"</span>: <span class="st">"US Social Security Number"</span>,</span>
|
||
<span id="cb3-18"><a href="#cb3-18"></a> <span class="st">"category"</span>: <span class="st">"identity"</span></span>
|
||
<span id="cb3-19"><a href="#cb3-19"></a> },</span>
|
||
<span id="cb3-20"><a href="#cb3-20"></a> <span class="st">"PASSPORT"</span>: {</span>
|
||
<span id="cb3-21"><a href="#cb3-21"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b[A-Z]{1,2}\d{6,9}\b'</span>,</span>
|
||
<span id="cb3-22"><a href="#cb3-22"></a> <span class="st">"description"</span>: <span class="st">"Passport Number"</span>,</span>
|
||
<span id="cb3-23"><a href="#cb3-23"></a> <span class="st">"category"</span>: <span class="st">"identity"</span></span>
|
||
<span id="cb3-24"><a href="#cb3-24"></a> },</span>
|
||
<span id="cb3-25"><a href="#cb3-25"></a></span>
|
||
<span id="cb3-26"><a href="#cb3-26"></a> <span class="co"># Financial Information</span></span>
|
||
<span id="cb3-27"><a href="#cb3-27"></a> <span class="st">"CREDIT_CARD"</span>: {</span>
|
||
<span id="cb3-28"><a href="#cb3-28"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b(?:4[0-9]</span><span class="sc">{12}</span><span class="vs">(?:[0-9]</span><span class="sc">{3}</span><span class="vs">)?|5[1-5][0-9]</span><span class="sc">{14}</span><span class="vs">|3[47][0-9]</span><span class="sc">{13}</span><span class="vs">)\b'</span>,</span>
|
||
<span id="cb3-29"><a href="#cb3-29"></a> <span class="st">"description"</span>: <span class="st">"Credit Card Number (Visa, MC, Amex)"</span>,</span>
|
||
<span id="cb3-30"><a href="#cb3-30"></a> <span class="st">"category"</span>: <span class="st">"financial"</span></span>
|
||
<span id="cb3-31"><a href="#cb3-31"></a> },</span>
|
||
<span id="cb3-32"><a href="#cb3-32"></a> <span class="st">"IBAN"</span>: {</span>
|
||
<span id="cb3-33"><a href="#cb3-33"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b[A-Z]</span><span class="sc">{2}</span><span class="vs">\d</span><span class="sc">{2}</span><span class="vs">[A-Z0-9]{4,30}\b'</span>,</span>
|
||
<span id="cb3-34"><a href="#cb3-34"></a> <span class="st">"description"</span>: <span class="st">"International Bank Account Number"</span>,</span>
|
||
<span id="cb3-35"><a href="#cb3-35"></a> <span class="st">"category"</span>: <span class="st">"financial"</span></span>
|
||
<span id="cb3-36"><a href="#cb3-36"></a> },</span>
|
||
<span id="cb3-37"><a href="#cb3-37"></a></span>
|
||
<span id="cb3-38"><a href="#cb3-38"></a> <span class="co"># Contact Information</span></span>
|
||
<span id="cb3-39"><a href="#cb3-39"></a> <span class="st">"EMAIL"</span>: {</span>
|
||
<span id="cb3-40"><a href="#cb3-40"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'</span>,</span>
|
||
<span id="cb3-41"><a href="#cb3-41"></a> <span class="st">"description"</span>: <span class="st">"Email Address"</span>,</span>
|
||
<span id="cb3-42"><a href="#cb3-42"></a> <span class="st">"category"</span>: <span class="st">"contact"</span></span>
|
||
<span id="cb3-43"><a href="#cb3-43"></a> },</span>
|
||
<span id="cb3-44"><a href="#cb3-44"></a> <span class="st">"PHONE_US"</span>: {</span>
|
||
<span id="cb3-45"><a href="#cb3-45"></a> <span class="st">"pattern"</span>: <span class="vs">r'\b(?:\+1[-.\s]?)?\(?\d</span><span class="sc">{3}</span><span class="vs">\)?[-.\s]?\d</span><span class="sc">{3}</span><span class="vs">[-.\s]?\d</span><span class="sc">{4}</span><span class="vs">\b'</span>,</span>
|
||
<span id="cb3-46"><a href="#cb3-46"></a> <span class="st">"description"</span>: <span class="st">"US Phone Number"</span>,</span>
|
||
<span id="cb3-47"><a href="#cb3-47"></a> <span class="st">"category"</span>: <span class="st">"contact"</span></span>
|
||
<span id="cb3-48"><a href="#cb3-48"></a> },</span>
|
||
<span id="cb3-49"><a href="#cb3-49"></a></span>
|
||
<span id="cb3-50"><a href="#cb3-50"></a> <span class="co"># Add more patterns as needed...</span></span>
|
||
<span id="cb3-51"><a href="#cb3-51"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>Each pattern includes a regex, human-readable description, and category for risk classification.</p>
|
||
</section>
|
||
<section id="step-3-build-the-basic-detection-engine" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-3-build-the-basic-detection-engine">Step 3: Build the Basic Detection Engine</h2>
|
||
<p>Add the core detection function:</p>
|
||
<div class="sourceCode" id="cb4"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw">def</span> detect_pii_basic(text: <span class="bu">str</span>) <span class="op">-></span> List[PIIEntity]:</span>
|
||
<span id="cb4-2"><a href="#cb4-2"></a> <span class="co">"""Layer 1: Standard regex pattern matching."""</span></span>
|
||
<span id="cb4-3"><a href="#cb4-3"></a> entities <span class="op">=</span> []</span>
|
||
<span id="cb4-4"><a href="#cb4-4"></a></span>
|
||
<span id="cb4-5"><a href="#cb4-5"></a> <span class="cf">for</span> pii_type, config <span class="kw">in</span> PII_PATTERNS.items():</span>
|
||
<span id="cb4-6"><a href="#cb4-6"></a> pattern <span class="op">=</span> re.<span class="bu">compile</span>(config[<span class="st">"pattern"</span>], re.IGNORECASE)</span>
|
||
<span id="cb4-7"><a href="#cb4-7"></a></span>
|
||
<span id="cb4-8"><a href="#cb4-8"></a> <span class="cf">for</span> match <span class="kw">in</span> pattern.finditer(text):</span>
|
||
<span id="cb4-9"><a href="#cb4-9"></a> entity <span class="op">=</span> PIIEntity(</span>
|
||
<span id="cb4-10"><a href="#cb4-10"></a> <span class="bu">type</span><span class="op">=</span>pii_type,</span>
|
||
<span id="cb4-11"><a href="#cb4-11"></a> value<span class="op">=</span>match.group(),</span>
|
||
<span id="cb4-12"><a href="#cb4-12"></a> start<span class="op">=</span>match.start(),</span>
|
||
<span id="cb4-13"><a href="#cb4-13"></a> end<span class="op">=</span>match.end(),</span>
|
||
<span id="cb4-14"><a href="#cb4-14"></a> confidence<span class="op">=</span><span class="fl">0.8</span>, <span class="co"># Base confidence</span></span>
|
||
<span id="cb4-15"><a href="#cb4-15"></a> context<span class="op">=</span>text[<span class="bu">max</span>(<span class="dv">0</span>, match.start()<span class="op">-</span><span class="dv">20</span>):match.end()<span class="op">+</span><span class="dv">20</span>]</span>
|
||
<span id="cb4-16"><a href="#cb4-16"></a> )</span>
|
||
<span id="cb4-17"><a href="#cb4-17"></a> entities.append(entity)</span>
|
||
<span id="cb4-18"><a href="#cb4-18"></a></span>
|
||
<span id="cb4-19"><a href="#cb4-19"></a> <span class="cf">return</span> entities</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>This gives us working PII detection, but it’s easily fooled by obfuscation.</p>
|
||
</section>
|
||
<section id="step-4-add-text-normalization-layer-2" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-4-add-text-normalization-layer-2">Step 4: Add Text Normalization (Layer 2)</h2>
|
||
<p>Attackers often hide PII using separators, leetspeak, or unicode tricks. Add normalization:</p>
|
||
<div class="sourceCode" id="cb5"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a><span class="kw">def</span> normalize_text(text: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">tuple</span>[<span class="bu">str</span>, <span class="bu">dict</span>]:</span>
|
||
<span id="cb5-2"><a href="#cb5-2"></a> <span class="co">"""Layer 2: Remove obfuscation techniques."""</span></span>
|
||
<span id="cb5-3"><a href="#cb5-3"></a> original <span class="op">=</span> text</span>
|
||
<span id="cb5-4"><a href="#cb5-4"></a> mappings <span class="op">=</span> {}</span>
|
||
<span id="cb5-5"><a href="#cb5-5"></a></span>
|
||
<span id="cb5-6"><a href="#cb5-6"></a> <span class="co"># Remove common separators</span></span>
|
||
<span id="cb5-7"><a href="#cb5-7"></a> normalized <span class="op">=</span> re.sub(<span class="vs">r'[\s\-\.\(\)]+'</span>, <span class="st">''</span>, text)</span>
|
||
<span id="cb5-8"><a href="#cb5-8"></a></span>
|
||
<span id="cb5-9"><a href="#cb5-9"></a> <span class="co"># Leetspeak conversion</span></span>
|
||
<span id="cb5-10"><a href="#cb5-10"></a> leet_map <span class="op">=</span> {<span class="st">'0'</span>: <span class="st">'o'</span>, <span class="st">'1'</span>: <span class="st">'i'</span>, <span class="st">'3'</span>: <span class="st">'e'</span>, <span class="st">'4'</span>: <span class="st">'a'</span>, <span class="st">'5'</span>: <span class="st">'s'</span>, <span class="st">'7'</span>: <span class="st">'t'</span>}</span>
|
||
<span id="cb5-11"><a href="#cb5-11"></a> <span class="cf">for</span> leet, char <span class="kw">in</span> leet_map.items():</span>
|
||
<span id="cb5-12"><a href="#cb5-12"></a> normalized <span class="op">=</span> normalized.replace(leet, char)</span>
|
||
<span id="cb5-13"><a href="#cb5-13"></a></span>
|
||
<span id="cb5-14"><a href="#cb5-14"></a> <span class="co"># Track position mappings for accurate reporting</span></span>
|
||
<span id="cb5-15"><a href="#cb5-15"></a> <span class="co"># (simplified - production code needs full position tracking)</span></span>
|
||
<span id="cb5-16"><a href="#cb5-16"></a></span>
|
||
<span id="cb5-17"><a href="#cb5-17"></a> <span class="cf">return</span> normalized, mappings</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>Now <code>4-5-6-7-8-9-0-1-2-3</code> gets normalized and detected as a potential SSN.</p>
|
||
</section>
|
||
<section id="step-5-implement-checksum-validation-layer-4" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-5-implement-checksum-validation-layer-4">Step 5: Implement Checksum Validation (Layer 4)</h2>
|
||
<p>Not every number sequence is valid PII. Add validation logic:</p>
|
||
<div class="sourceCode" id="cb6"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a><span class="kw">def</span> luhn_checksum(card_number: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">bool</span>:</span>
|
||
<span id="cb6-2"><a href="#cb6-2"></a> <span class="co">"""Validate credit card using Luhn algorithm."""</span></span>
|
||
<span id="cb6-3"><a href="#cb6-3"></a> digits <span class="op">=</span> [<span class="bu">int</span>(d) <span class="cf">for</span> d <span class="kw">in</span> card_number <span class="cf">if</span> d.isdigit()]</span>
|
||
<span id="cb6-4"><a href="#cb6-4"></a> odd_digits <span class="op">=</span> digits[<span class="op">-</span><span class="dv">1</span>::<span class="op">-</span><span class="dv">2</span>]</span>
|
||
<span id="cb6-5"><a href="#cb6-5"></a> even_digits <span class="op">=</span> digits[<span class="op">-</span><span class="dv">2</span>::<span class="op">-</span><span class="dv">2</span>]</span>
|
||
<span id="cb6-6"><a href="#cb6-6"></a></span>
|
||
<span id="cb6-7"><a href="#cb6-7"></a> total <span class="op">=</span> <span class="bu">sum</span>(odd_digits)</span>
|
||
<span id="cb6-8"><a href="#cb6-8"></a> <span class="cf">for</span> d <span class="kw">in</span> even_digits:</span>
|
||
<span id="cb6-9"><a href="#cb6-9"></a> total <span class="op">+=</span> <span class="bu">sum</span>(<span class="bu">divmod</span>(d <span class="op">*</span> <span class="dv">2</span>, <span class="dv">10</span>))</span>
|
||
<span id="cb6-10"><a href="#cb6-10"></a></span>
|
||
<span id="cb6-11"><a href="#cb6-11"></a> <span class="cf">return</span> total <span class="op">%</span> <span class="dv">10</span> <span class="op">==</span> <span class="dv">0</span></span>
|
||
<span id="cb6-12"><a href="#cb6-12"></a></span>
|
||
<span id="cb6-13"><a href="#cb6-13"></a><span class="kw">def</span> validate_iban(iban: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">bool</span>:</span>
|
||
<span id="cb6-14"><a href="#cb6-14"></a> <span class="co">"""Validate IBAN using MOD-97 algorithm."""</span></span>
|
||
<span id="cb6-15"><a href="#cb6-15"></a> iban <span class="op">=</span> iban.replace(<span class="st">' '</span>, <span class="st">''</span>).upper()</span>
|
||
<span id="cb6-16"><a href="#cb6-16"></a></span>
|
||
<span id="cb6-17"><a href="#cb6-17"></a> <span class="co"># Move first 4 chars to end</span></span>
|
||
<span id="cb6-18"><a href="#cb6-18"></a> rearranged <span class="op">=</span> iban[<span class="dv">4</span>:] <span class="op">+</span> iban[:<span class="dv">4</span>]</span>
|
||
<span id="cb6-19"><a href="#cb6-19"></a></span>
|
||
<span id="cb6-20"><a href="#cb6-20"></a> <span class="co"># Convert letters to numbers (A=10, B=11, etc.)</span></span>
|
||
<span id="cb6-21"><a href="#cb6-21"></a> numeric <span class="op">=</span> <span class="st">''</span></span>
|
||
<span id="cb6-22"><a href="#cb6-22"></a> <span class="cf">for</span> char <span class="kw">in</span> rearranged:</span>
|
||
<span id="cb6-23"><a href="#cb6-23"></a> <span class="cf">if</span> char.isdigit():</span>
|
||
<span id="cb6-24"><a href="#cb6-24"></a> numeric <span class="op">+=</span> char</span>
|
||
<span id="cb6-25"><a href="#cb6-25"></a> <span class="cf">else</span>:</span>
|
||
<span id="cb6-26"><a href="#cb6-26"></a> numeric <span class="op">+=</span> <span class="bu">str</span>(<span class="bu">ord</span>(char) <span class="op">-</span> <span class="dv">55</span>)</span>
|
||
<span id="cb6-27"><a href="#cb6-27"></a></span>
|
||
<span id="cb6-28"><a href="#cb6-28"></a> <span class="cf">return</span> <span class="bu">int</span>(numeric) <span class="op">%</span> <span class="dv">97</span> <span class="op">==</span> <span class="dv">1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>With validation, we can boost confidence for valid numbers and flag invalid ones as <code>POSSIBLE_CARD_PATTERN</code>.</p>
|
||
</section>
|
||
<section id="step-6-json-blob-extraction-layer-2.5" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-6-json-blob-extraction-layer-2.5">Step 6: JSON Blob Extraction (Layer 2.5)</h2>
|
||
<p>PII often hides in JSON payloads within logs or messages:</p>
|
||
<div class="sourceCode" id="cb7"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="im">import</span> json</span>
|
||
<span id="cb7-2"><a href="#cb7-2"></a></span>
|
||
<span id="cb7-3"><a href="#cb7-3"></a><span class="kw">def</span> extract_json_strings(text: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">tuple</span>[<span class="bu">str</span>, <span class="bu">int</span>, <span class="bu">int</span>]]:</span>
|
||
<span id="cb7-4"><a href="#cb7-4"></a> <span class="co">"""Find and extract JSON objects from text."""</span></span>
|
||
<span id="cb7-5"><a href="#cb7-5"></a> json_objects <span class="op">=</span> []</span>
|
||
<span id="cb7-6"><a href="#cb7-6"></a></span>
|
||
<span id="cb7-7"><a href="#cb7-7"></a> <span class="co"># Find potential JSON starts</span></span>
|
||
<span id="cb7-8"><a href="#cb7-8"></a> <span class="cf">for</span> i, char <span class="kw">in</span> <span class="bu">enumerate</span>(text):</span>
|
||
<span id="cb7-9"><a href="#cb7-9"></a> <span class="cf">if</span> char <span class="op">==</span> <span class="st">'{'</span>:</span>
|
||
<span id="cb7-10"><a href="#cb7-10"></a> depth <span class="op">=</span> <span class="dv">0</span></span>
|
||
<span id="cb7-11"><a href="#cb7-11"></a> <span class="cf">for</span> j <span class="kw">in</span> <span class="bu">range</span>(i, <span class="bu">len</span>(text)):</span>
|
||
<span id="cb7-12"><a href="#cb7-12"></a> <span class="cf">if</span> text[j] <span class="op">==</span> <span class="st">'{'</span>:</span>
|
||
<span id="cb7-13"><a href="#cb7-13"></a> depth <span class="op">+=</span> <span class="dv">1</span></span>
|
||
<span id="cb7-14"><a href="#cb7-14"></a> <span class="cf">elif</span> text[j] <span class="op">==</span> <span class="st">'}'</span>:</span>
|
||
<span id="cb7-15"><a href="#cb7-15"></a> depth <span class="op">-=</span> <span class="dv">1</span></span>
|
||
<span id="cb7-16"><a href="#cb7-16"></a> <span class="cf">if</span> depth <span class="op">==</span> <span class="dv">0</span>:</span>
|
||
<span id="cb7-17"><a href="#cb7-17"></a> <span class="cf">try</span>:</span>
|
||
<span id="cb7-18"><a href="#cb7-18"></a> candidate <span class="op">=</span> text[i:j<span class="op">+</span><span class="dv">1</span>]</span>
|
||
<span id="cb7-19"><a href="#cb7-19"></a> json.loads(candidate) <span class="co"># Validate</span></span>
|
||
<span id="cb7-20"><a href="#cb7-20"></a> json_objects.append((candidate, i, j<span class="op">+</span><span class="dv">1</span>))</span>
|
||
<span id="cb7-21"><a href="#cb7-21"></a> <span class="cf">except</span> json.JSONDecodeError:</span>
|
||
<span id="cb7-22"><a href="#cb7-22"></a> <span class="cf">pass</span></span>
|
||
<span id="cb7-23"><a href="#cb7-23"></a> <span class="cf">break</span></span>
|
||
<span id="cb7-24"><a href="#cb7-24"></a></span>
|
||
<span id="cb7-25"><a href="#cb7-25"></a> <span class="cf">return</span> json_objects</span>
|
||
<span id="cb7-26"><a href="#cb7-26"></a></span>
|
||
<span id="cb7-27"><a href="#cb7-27"></a><span class="kw">def</span> deep_scan_json(json_str: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">str</span>]:</span>
|
||
<span id="cb7-28"><a href="#cb7-28"></a> <span class="co">"""Recursively extract all string values from JSON."""</span></span>
|
||
<span id="cb7-29"><a href="#cb7-29"></a> values <span class="op">=</span> []</span>
|
||
<span id="cb7-30"><a href="#cb7-30"></a></span>
|
||
<span id="cb7-31"><a href="#cb7-31"></a> <span class="kw">def</span> extract(obj):</span>
|
||
<span id="cb7-32"><a href="#cb7-32"></a> <span class="cf">if</span> <span class="bu">isinstance</span>(obj, <span class="bu">str</span>):</span>
|
||
<span id="cb7-33"><a href="#cb7-33"></a> values.append(obj)</span>
|
||
<span id="cb7-34"><a href="#cb7-34"></a> <span class="cf">elif</span> <span class="bu">isinstance</span>(obj, <span class="bu">dict</span>):</span>
|
||
<span id="cb7-35"><a href="#cb7-35"></a> <span class="cf">for</span> v <span class="kw">in</span> obj.values():</span>
|
||
<span id="cb7-36"><a href="#cb7-36"></a> extract(v)</span>
|
||
<span id="cb7-37"><a href="#cb7-37"></a> <span class="cf">elif</span> <span class="bu">isinstance</span>(obj, <span class="bu">list</span>):</span>
|
||
<span id="cb7-38"><a href="#cb7-38"></a> <span class="cf">for</span> item <span class="kw">in</span> obj:</span>
|
||
<span id="cb7-39"><a href="#cb7-39"></a> extract(item)</span>
|
||
<span id="cb7-40"><a href="#cb7-40"></a></span>
|
||
<span id="cb7-41"><a href="#cb7-41"></a> <span class="cf">try</span>:</span>
|
||
<span id="cb7-42"><a href="#cb7-42"></a> extract(json.loads(json_str))</span>
|
||
<span id="cb7-43"><a href="#cb7-43"></a> <span class="cf">except</span>:</span>
|
||
<span id="cb7-44"><a href="#cb7-44"></a> <span class="cf">pass</span></span>
|
||
<span id="cb7-45"><a href="#cb7-45"></a></span>
|
||
<span id="cb7-46"><a href="#cb7-46"></a> <span class="cf">return</span> values</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="step-7-base64-auto-decoding-layer-2.6" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-7-base64-auto-decoding-layer-2.6">Step 7: Base64 Auto-Decoding (Layer 2.6)</h2>
|
||
<p>Encoded PII is common in API responses and logs:</p>
|
||
<div class="sourceCode" id="cb8"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a><span class="im">import</span> base64</span>
|
||
<span id="cb8-2"><a href="#cb8-2"></a></span>
|
||
<span id="cb8-3"><a href="#cb8-3"></a><span class="kw">def</span> is_valid_base64(s: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">bool</span>:</span>
|
||
<span id="cb8-4"><a href="#cb8-4"></a> <span class="co">"""Check if string is valid base64."""</span></span>
|
||
<span id="cb8-5"><a href="#cb8-5"></a> <span class="cf">if</span> <span class="bu">len</span>(s) <span class="op"><</span> <span class="dv">20</span> <span class="kw">or</span> <span class="bu">len</span>(s) <span class="op">%</span> <span class="dv">4</span> <span class="op">!=</span> <span class="dv">0</span>:</span>
|
||
<span id="cb8-6"><a href="#cb8-6"></a> <span class="cf">return</span> <span class="va">False</span></span>
|
||
<span id="cb8-7"><a href="#cb8-7"></a> <span class="cf">try</span>:</span>
|
||
<span id="cb8-8"><a href="#cb8-8"></a> decoded <span class="op">=</span> base64.b64decode(s, validate<span class="op">=</span><span class="va">True</span>)</span>
|
||
<span id="cb8-9"><a href="#cb8-9"></a> decoded.decode(<span class="st">'utf-8'</span>) <span class="co"># Must be valid UTF-8</span></span>
|
||
<span id="cb8-10"><a href="#cb8-10"></a> <span class="cf">return</span> <span class="va">True</span></span>
|
||
<span id="cb8-11"><a href="#cb8-11"></a> <span class="cf">except</span>:</span>
|
||
<span id="cb8-12"><a href="#cb8-12"></a> <span class="cf">return</span> <span class="va">False</span></span>
|
||
<span id="cb8-13"><a href="#cb8-13"></a></span>
|
||
<span id="cb8-14"><a href="#cb8-14"></a><span class="kw">def</span> decode_base64_strings(text: <span class="bu">str</span>) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">tuple</span>[<span class="bu">str</span>, <span class="bu">str</span>, <span class="bu">int</span>, <span class="bu">int</span>]]:</span>
|
||
<span id="cb8-15"><a href="#cb8-15"></a> <span class="co">"""Find and decode base64 strings."""</span></span>
|
||
<span id="cb8-16"><a href="#cb8-16"></a> results <span class="op">=</span> []</span>
|
||
<span id="cb8-17"><a href="#cb8-17"></a> pattern <span class="op">=</span> <span class="vs">r'[A-Za-z0-9+/]{20,}={0,2}'</span></span>
|
||
<span id="cb8-18"><a href="#cb8-18"></a></span>
|
||
<span id="cb8-19"><a href="#cb8-19"></a> <span class="cf">for</span> match <span class="kw">in</span> re.finditer(pattern, text):</span>
|
||
<span id="cb8-20"><a href="#cb8-20"></a> candidate <span class="op">=</span> match.group()</span>
|
||
<span id="cb8-21"><a href="#cb8-21"></a> <span class="cf">if</span> is_valid_base64(candidate):</span>
|
||
<span id="cb8-22"><a href="#cb8-22"></a> <span class="cf">try</span>:</span>
|
||
<span id="cb8-23"><a href="#cb8-23"></a> decoded <span class="op">=</span> base64.b64decode(candidate).decode(<span class="st">'utf-8'</span>)</span>
|
||
<span id="cb8-24"><a href="#cb8-24"></a> results.append((candidate, decoded, match.start(), match.end()))</span>
|
||
<span id="cb8-25"><a href="#cb8-25"></a> <span class="cf">except</span>:</span>
|
||
<span id="cb8-26"><a href="#cb8-26"></a> <span class="cf">pass</span></span>
|
||
<span id="cb8-27"><a href="#cb8-27"></a></span>
|
||
<span id="cb8-28"><a href="#cb8-28"></a> <span class="cf">return</span> results</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="step-8-build-the-fastapi-endpoint" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-8-build-the-fastapi-endpoint">Step 8: Build the FastAPI Endpoint</h2>
|
||
<p>Wire everything together in an API endpoint:</p>
|
||
<div class="sourceCode" id="cb9"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a><span class="im">from</span> fastapi <span class="im">import</span> APIRouter, Form</span>
|
||
<span id="cb9-2"><a href="#cb9-2"></a></span>
|
||
<span id="cb9-3"><a href="#cb9-3"></a>router <span class="op">=</span> APIRouter(prefix<span class="op">=</span><span class="st">"/api/privacy"</span>, tags<span class="op">=</span>[<span class="st">"privacy"</span>])</span>
|
||
<span id="cb9-4"><a href="#cb9-4"></a></span>
|
||
<span id="cb9-5"><a href="#cb9-5"></a><span class="at">@router.post</span>(<span class="st">"/scan-text"</span>)</span>
|
||
<span id="cb9-6"><a href="#cb9-6"></a><span class="cf">async</span> <span class="kw">def</span> scan_text(</span>
|
||
<span id="cb9-7"><a href="#cb9-7"></a> text: <span class="bu">str</span> <span class="op">=</span> Form(...),</span>
|
||
<span id="cb9-8"><a href="#cb9-8"></a> sensitivity: <span class="bu">str</span> <span class="op">=</span> Form(<span class="st">"medium"</span>)</span>
|
||
<span id="cb9-9"><a href="#cb9-9"></a>):</span>
|
||
<span id="cb9-10"><a href="#cb9-10"></a> <span class="co">"""Main PII scanning endpoint."""</span></span>
|
||
<span id="cb9-11"><a href="#cb9-11"></a></span>
|
||
<span id="cb9-12"><a href="#cb9-12"></a> <span class="co"># Layer 1: Basic pattern matching</span></span>
|
||
<span id="cb9-13"><a href="#cb9-13"></a> entities <span class="op">=</span> detect_pii_basic(text)</span>
|
||
<span id="cb9-14"><a href="#cb9-14"></a></span>
|
||
<span id="cb9-15"><a href="#cb9-15"></a> <span class="co"># Layer 2: Normalized text scan</span></span>
|
||
<span id="cb9-16"><a href="#cb9-16"></a> normalized, mappings <span class="op">=</span> normalize_text(text)</span>
|
||
<span id="cb9-17"><a href="#cb9-17"></a> normalized_entities <span class="op">=</span> detect_pii_basic(normalized)</span>
|
||
<span id="cb9-18"><a href="#cb9-18"></a> <span class="co"># ... map positions back to original</span></span>
|
||
<span id="cb9-19"><a href="#cb9-19"></a></span>
|
||
<span id="cb9-20"><a href="#cb9-20"></a> <span class="co"># Layer 2.5: JSON extraction</span></span>
|
||
<span id="cb9-21"><a href="#cb9-21"></a> <span class="cf">for</span> json_str, start, end <span class="kw">in</span> extract_json_strings(text):</span>
|
||
<span id="cb9-22"><a href="#cb9-22"></a> <span class="cf">for</span> value <span class="kw">in</span> deep_scan_json(json_str):</span>
|
||
<span id="cb9-23"><a href="#cb9-23"></a> entities.extend(detect_pii_basic(value))</span>
|
||
<span id="cb9-24"><a href="#cb9-24"></a></span>
|
||
<span id="cb9-25"><a href="#cb9-25"></a> <span class="co"># Layer 2.6: Base64 decoding</span></span>
|
||
<span id="cb9-26"><a href="#cb9-26"></a> <span class="cf">for</span> original, decoded, start, end <span class="kw">in</span> decode_base64_strings(text):</span>
|
||
<span id="cb9-27"><a href="#cb9-27"></a> decoded_entities <span class="op">=</span> detect_pii_basic(decoded)</span>
|
||
<span id="cb9-28"><a href="#cb9-28"></a> <span class="cf">for</span> e <span class="kw">in</span> decoded_entities:</span>
|
||
<span id="cb9-29"><a href="#cb9-29"></a> e.<span class="bu">type</span> <span class="op">=</span> <span class="ss">f"</span><span class="sc">{</span>e<span class="sc">.</span><span class="bu">type</span><span class="sc">}</span><span class="ss">_BASE64_ENCODED"</span></span>
|
||
<span id="cb9-30"><a href="#cb9-30"></a> entities.extend(decoded_entities)</span>
|
||
<span id="cb9-31"><a href="#cb9-31"></a></span>
|
||
<span id="cb9-32"><a href="#cb9-32"></a> <span class="co"># Layer 4: Validation</span></span>
|
||
<span id="cb9-33"><a href="#cb9-33"></a> <span class="cf">for</span> entity <span class="kw">in</span> entities:</span>
|
||
<span id="cb9-34"><a href="#cb9-34"></a> <span class="cf">if</span> entity.<span class="bu">type</span> <span class="op">==</span> <span class="st">"CREDIT_CARD"</span>:</span>
|
||
<span id="cb9-35"><a href="#cb9-35"></a> <span class="cf">if</span> luhn_checksum(entity.value):</span>
|
||
<span id="cb9-36"><a href="#cb9-36"></a> entity.confidence <span class="op">=</span> <span class="fl">0.95</span></span>
|
||
<span id="cb9-37"><a href="#cb9-37"></a> <span class="cf">else</span>:</span>
|
||
<span id="cb9-38"><a href="#cb9-38"></a> entity.<span class="bu">type</span> <span class="op">=</span> <span class="st">"POSSIBLE_CARD_PATTERN"</span></span>
|
||
<span id="cb9-39"><a href="#cb9-39"></a> entity.confidence <span class="op">=</span> <span class="fl">0.5</span></span>
|
||
<span id="cb9-40"><a href="#cb9-40"></a></span>
|
||
<span id="cb9-41"><a href="#cb9-41"></a> <span class="co"># Deduplicate and sort</span></span>
|
||
<span id="cb9-42"><a href="#cb9-42"></a> entities <span class="op">=</span> deduplicate_entities(entities)</span>
|
||
<span id="cb9-43"><a href="#cb9-43"></a></span>
|
||
<span id="cb9-44"><a href="#cb9-44"></a> <span class="co"># Generate masked preview</span></span>
|
||
<span id="cb9-45"><a href="#cb9-45"></a> redacted <span class="op">=</span> mask_pii(text, entities)</span>
|
||
<span id="cb9-46"><a href="#cb9-46"></a></span>
|
||
<span id="cb9-47"><a href="#cb9-47"></a> <span class="cf">return</span> {</span>
|
||
<span id="cb9-48"><a href="#cb9-48"></a> <span class="st">"entities"</span>: [e.<span class="bu">dict</span>() <span class="cf">for</span> e <span class="kw">in</span> entities],</span>
|
||
<span id="cb9-49"><a href="#cb9-49"></a> <span class="st">"redacted_preview"</span>: redacted,</span>
|
||
<span id="cb9-50"><a href="#cb9-50"></a> <span class="st">"summary"</span>: generate_summary(entities)</span>
|
||
<span id="cb9-51"><a href="#cb9-51"></a> }</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="step-9-create-the-sveltekit-frontend" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-9-create-the-sveltekit-frontend">Step 9: Create the SvelteKit Frontend</h2>
|
||
<p>Build an interactive UI in <code>frontend/src/routes/privacy-scanner/+page.svelte</code>:</p>
|
||
<div class="sourceCode" id="cb10"><pre class="sourceCode numberSource svelte number-lines code-with-copy"><code class="sourceCode"><span id="cb10-1"><a href="#cb10-1"></a><script lang="ts"></span>
|
||
<span id="cb10-2"><a href="#cb10-2"></a> let inputText = '';</span>
|
||
<span id="cb10-3"><a href="#cb10-3"></a> let results: any = null;</span>
|
||
<span id="cb10-4"><a href="#cb10-4"></a> let loading = false;</span>
|
||
<span id="cb10-5"><a href="#cb10-5"></a></span>
|
||
<span id="cb10-6"><a href="#cb10-6"></a> async function scanText() {</span>
|
||
<span id="cb10-7"><a href="#cb10-7"></a> loading = true;</span>
|
||
<span id="cb10-8"><a href="#cb10-8"></a> const formData = new FormData();</span>
|
||
<span id="cb10-9"><a href="#cb10-9"></a> formData.append('text', inputText);</span>
|
||
<span id="cb10-10"><a href="#cb10-10"></a></span>
|
||
<span id="cb10-11"><a href="#cb10-11"></a> const response = await fetch('/api/privacy/scan-text', {</span>
|
||
<span id="cb10-12"><a href="#cb10-12"></a> method: 'POST',</span>
|
||
<span id="cb10-13"><a href="#cb10-13"></a> body: formData</span>
|
||
<span id="cb10-14"><a href="#cb10-14"></a> });</span>
|
||
<span id="cb10-15"><a href="#cb10-15"></a></span>
|
||
<span id="cb10-16"><a href="#cb10-16"></a> results = await response.json();</span>
|
||
<span id="cb10-17"><a href="#cb10-17"></a> loading = false;</span>
|
||
<span id="cb10-18"><a href="#cb10-18"></a> }</span>
|
||
<span id="cb10-19"><a href="#cb10-19"></a></script></span>
|
||
<span id="cb10-20"><a href="#cb10-20"></a></span>
|
||
<span id="cb10-21"><a href="#cb10-21"></a><div class="container mx-auto p-6"></span>
|
||
<span id="cb10-22"><a href="#cb10-22"></a> <h1 class="text-2xl font-bold mb-4">Privacy Scanner</h1></span>
|
||
<span id="cb10-23"><a href="#cb10-23"></a></span>
|
||
<span id="cb10-24"><a href="#cb10-24"></a> <textarea</span>
|
||
<span id="cb10-25"><a href="#cb10-25"></a> bind:value={inputText}</span>
|
||
<span id="cb10-26"><a href="#cb10-26"></a> class="w-full h-48 p-4 border rounded"</span>
|
||
<span id="cb10-27"><a href="#cb10-27"></a> placeholder="Paste text to scan for PII..."</span>
|
||
<span id="cb10-28"><a href="#cb10-28"></a> ></textarea></span>
|
||
<span id="cb10-29"><a href="#cb10-29"></a></span>
|
||
<span id="cb10-30"><a href="#cb10-30"></a> <button</span>
|
||
<span id="cb10-31"><a href="#cb10-31"></a> on:click={scanText}</span>
|
||
<span id="cb10-32"><a href="#cb10-32"></a> disabled={loading}</span>
|
||
<span id="cb10-33"><a href="#cb10-33"></a> class="mt-4 px-6 py-2 bg-blue-600 text-white rounded"</span>
|
||
<span id="cb10-34"><a href="#cb10-34"></a> ></span>
|
||
<span id="cb10-35"><a href="#cb10-35"></a> {loading ? 'Scanning...' : 'Scan for PII'}</span>
|
||
<span id="cb10-36"><a href="#cb10-36"></a> </button></span>
|
||
<span id="cb10-37"><a href="#cb10-37"></a></span>
|
||
<span id="cb10-38"><a href="#cb10-38"></a> {#if results}</span>
|
||
<span id="cb10-39"><a href="#cb10-39"></a> <div class="mt-6"></span>
|
||
<span id="cb10-40"><a href="#cb10-40"></a> <h2 class="text-xl font-semibold">Results</h2></span>
|
||
<span id="cb10-41"><a href="#cb10-41"></a></span>
|
||
<span id="cb10-42"><a href="#cb10-42"></a> <!-- Entity badges --></span>
|
||
<span id="cb10-43"><a href="#cb10-43"></a> <div class="flex flex-wrap gap-2 mt-4"></span>
|
||
<span id="cb10-44"><a href="#cb10-44"></a> {#each results.entities as entity}</span>
|
||
<span id="cb10-45"><a href="#cb10-45"></a> <span class="px-3 py-1 rounded-full bg-red-100 text-red-800"></span>
|
||
<span id="cb10-46"><a href="#cb10-46"></a> {entity.type}: {entity.value}</span>
|
||
<span id="cb10-47"><a href="#cb10-47"></a> </span></span>
|
||
<span id="cb10-48"><a href="#cb10-48"></a> {/each}</span>
|
||
<span id="cb10-49"><a href="#cb10-49"></a> </div></span>
|
||
<span id="cb10-50"><a href="#cb10-50"></a></span>
|
||
<span id="cb10-51"><a href="#cb10-51"></a> <!-- Redacted preview --></span>
|
||
<span id="cb10-52"><a href="#cb10-52"></a> <div class="mt-4 p-4 bg-gray-100 rounded font-mono"></span>
|
||
<span id="cb10-53"><a href="#cb10-53"></a> {results.redacted_preview}</span>
|
||
<span id="cb10-54"><a href="#cb10-54"></a> </div></span>
|
||
<span id="cb10-55"><a href="#cb10-55"></a> </div></span>
|
||
<span id="cb10-56"><a href="#cb10-56"></a> {/if}</span>
|
||
<span id="cb10-57"><a href="#cb10-57"></a></div></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="step-10-add-security-features" class="level2">
|
||
<h2 class="anchored" data-anchor-id="step-10-add-security-features">Step 10: Add Security Features</h2>
|
||
<p>For production deployment, implement ephemeral processing:</p>
|
||
<div class="sourceCode" id="cb11"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a><span class="co"># In main.py - ensure no PII logging</span></span>
|
||
<span id="cb11-2"><a href="#cb11-2"></a><span class="im">import</span> logging</span>
|
||
<span id="cb11-3"><a href="#cb11-3"></a></span>
|
||
<span id="cb11-4"><a href="#cb11-4"></a><span class="kw">class</span> PIIFilter(logging.Filter):</span>
|
||
<span id="cb11-5"><a href="#cb11-5"></a> <span class="kw">def</span> <span class="bu">filter</span>(<span class="va">self</span>, record):</span>
|
||
<span id="cb11-6"><a href="#cb11-6"></a> <span class="co"># Never log request bodies that might contain PII</span></span>
|
||
<span id="cb11-7"><a href="#cb11-7"></a> <span class="cf">return</span> <span class="st">'text='</span> <span class="kw">not</span> <span class="kw">in</span> <span class="bu">str</span>(record.msg)</span>
|
||
<span id="cb11-8"><a href="#cb11-8"></a></span>
|
||
<span id="cb11-9"><a href="#cb11-9"></a>logging.getLogger().addFilter(PIIFilter())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>And add coordinates-only mode for ultra-sensitive clients:</p>
|
||
<div class="sourceCode" id="cb12"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a><span class="at">@router.post</span>(<span class="st">"/scan-text"</span>)</span>
|
||
<span id="cb12-2"><a href="#cb12-2"></a><span class="cf">async</span> <span class="kw">def</span> scan_text(</span>
|
||
<span id="cb12-3"><a href="#cb12-3"></a> text: <span class="bu">str</span> <span class="op">=</span> Form(...),</span>
|
||
<span id="cb12-4"><a href="#cb12-4"></a> coordinates_only: <span class="bu">bool</span> <span class="op">=</span> Form(<span class="va">False</span>) <span class="co"># Client-side redaction mode</span></span>
|
||
<span id="cb12-5"><a href="#cb12-5"></a>):</span>
|
||
<span id="cb12-6"><a href="#cb12-6"></a> entities <span class="op">=</span> detect_pii_multilayer(text)</span>
|
||
<span id="cb12-7"><a href="#cb12-7"></a></span>
|
||
<span id="cb12-8"><a href="#cb12-8"></a> <span class="cf">if</span> coordinates_only:</span>
|
||
<span id="cb12-9"><a href="#cb12-9"></a> <span class="co"># Return only positions, not actual values</span></span>
|
||
<span id="cb12-10"><a href="#cb12-10"></a> <span class="cf">return</span> {</span>
|
||
<span id="cb12-11"><a href="#cb12-11"></a> <span class="st">"entities"</span>: [</span>
|
||
<span id="cb12-12"><a href="#cb12-12"></a> {<span class="st">"type"</span>: e.<span class="bu">type</span>, <span class="st">"start"</span>: e.start, <span class="st">"end"</span>: e.end, <span class="st">"length"</span>: e.end <span class="op">-</span> e.start}</span>
|
||
<span id="cb12-13"><a href="#cb12-13"></a> <span class="cf">for</span> e <span class="kw">in</span> entities</span>
|
||
<span id="cb12-14"><a href="#cb12-14"></a> ],</span>
|
||
<span id="cb12-15"><a href="#cb12-15"></a> <span class="st">"coordinates_only"</span>: <span class="va">True</span></span>
|
||
<span id="cb12-16"><a href="#cb12-16"></a> }</span>
|
||
<span id="cb12-17"><a href="#cb12-17"></a></span>
|
||
<span id="cb12-18"><a href="#cb12-18"></a> <span class="co"># Normal response with values</span></span>
|
||
<span id="cb12-19"><a href="#cb12-19"></a> <span class="cf">return</span> {<span class="st">"entities"</span>: [e.<span class="bu">dict</span>() <span class="cf">for</span> e <span class="kw">in</span> entities], ...}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="conclusion" class="level2">
|
||
<h2 class="anchored" data-anchor-id="conclusion">Conclusion</h2>
|
||
<p>You’ve now built a multi-layer Privacy Scanner that can:</p>
|
||
<ul>
|
||
<li>Detect 40+ PII types using regex patterns</li>
|
||
<li>Defeat obfuscation through text normalization</li>
|
||
<li>Extract PII from JSON payloads and Base64 encodings</li>
|
||
<li>Validate checksums to reduce false positives</li>
|
||
<li>Provide a clean web interface for interactive scanning</li>
|
||
<li>Operate in secure, coordinates-only mode</li>
|
||
</ul>
|
||
<p><strong>Next steps</strong> to enhance your scanner:</p>
|
||
<ol type="1">
|
||
<li>Add machine learning for name/address detection</li>
|
||
<li>Implement language-specific patterns (EU VAT, UK NI numbers)</li>
|
||
<li>Build CI/CD integration for automated pre-commit scanning</li>
|
||
<li>Add PDF and document parsing capabilities</li>
|
||
</ol>
|
||
<p>The complete source code is available in the AI Tools Suite repository. Happy scanning!</p>
|
||
</section>
|
||
|
||
</main>
|
||
<!-- /main column -->
|
||
<script id="quarto-html-after-body" type="application/javascript">
|
||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||
const toggleBodyColorMode = (bsSheetEl) => {
|
||
const mode = bsSheetEl.getAttribute("data-mode");
|
||
const bodyEl = window.document.querySelector("body");
|
||
if (mode === "dark") {
|
||
bodyEl.classList.add("quarto-dark");
|
||
bodyEl.classList.remove("quarto-light");
|
||
} else {
|
||
bodyEl.classList.add("quarto-light");
|
||
bodyEl.classList.remove("quarto-dark");
|
||
}
|
||
}
|
||
const toggleBodyColorPrimary = () => {
|
||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||
if (bsSheetEl) {
|
||
toggleBodyColorMode(bsSheetEl);
|
||
}
|
||
}
|
||
toggleBodyColorPrimary();
|
||
const icon = "";
|
||
const anchorJS = new window.AnchorJS();
|
||
anchorJS.options = {
|
||
placement: 'right',
|
||
icon: icon
|
||
};
|
||
anchorJS.add('.anchored');
|
||
const isCodeAnnotation = (el) => {
|
||
for (const clz of el.classList) {
|
||
if (clz.startsWith('code-annotation-')) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
const onCopySuccess = function(e) {
|
||
// button target
|
||
const button = e.trigger;
|
||
// don't keep focus
|
||
button.blur();
|
||
// flash "checked"
|
||
button.classList.add('code-copy-button-checked');
|
||
var currentTitle = button.getAttribute("title");
|
||
button.setAttribute("title", "Copied!");
|
||
let tooltip;
|
||
if (window.bootstrap) {
|
||
button.setAttribute("data-bs-toggle", "tooltip");
|
||
button.setAttribute("data-bs-placement", "left");
|
||
button.setAttribute("data-bs-title", "Copied!");
|
||
tooltip = new bootstrap.Tooltip(button,
|
||
{ trigger: "manual",
|
||
customClass: "code-copy-button-tooltip",
|
||
offset: [0, -8]});
|
||
tooltip.show();
|
||
}
|
||
setTimeout(function() {
|
||
if (tooltip) {
|
||
tooltip.hide();
|
||
button.removeAttribute("data-bs-title");
|
||
button.removeAttribute("data-bs-toggle");
|
||
button.removeAttribute("data-bs-placement");
|
||
}
|
||
button.setAttribute("title", currentTitle);
|
||
button.classList.remove('code-copy-button-checked');
|
||
}, 1000);
|
||
// clear code selection
|
||
e.clearSelection();
|
||
}
|
||
const getTextToCopy = function(trigger) {
|
||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||
for (const childEl of codeEl.children) {
|
||
if (isCodeAnnotation(childEl)) {
|
||
childEl.remove();
|
||
}
|
||
}
|
||
return codeEl.innerText;
|
||
}
|
||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||
text: getTextToCopy
|
||
});
|
||
clipboard.on('success', onCopySuccess);
|
||
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
|
||
// For code content inside modals, clipBoardJS needs to be initialized with a container option
|
||
// TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
|
||
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
|
||
text: getTextToCopy,
|
||
container: window.document.getElementById('quarto-embedded-source-code-modal')
|
||
});
|
||
clipboardModal.on('success', onCopySuccess);
|
||
}
|
||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||
var mailtoRegex = new RegExp(/^mailto:/);
|
||
var filterRegex = new RegExp('/' + window.location.host + '/');
|
||
var isInternal = (href) => {
|
||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||
}
|
||
// Inspect non-navigation links and adorn them if external
|
||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
|
||
for (var i=0; i<links.length; i++) {
|
||
const link = links[i];
|
||
if (!isInternal(link.href)) {
|
||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||
// links that we want to consider external
|
||
if (link.dataset.originalHref !== undefined) {
|
||
link.href = link.dataset.originalHref;
|
||
}
|
||
}
|
||
}
|
||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||
const config = {
|
||
allowHTML: true,
|
||
maxWidth: 500,
|
||
delay: 100,
|
||
arrow: false,
|
||
appendTo: function(el) {
|
||
return el.parentElement;
|
||
},
|
||
interactive: true,
|
||
interactiveBorder: 10,
|
||
theme: 'quarto',
|
||
placement: 'bottom-start',
|
||
};
|
||
if (contentFn) {
|
||
config.content = contentFn;
|
||
}
|
||
if (onTriggerFn) {
|
||
config.onTrigger = onTriggerFn;
|
||
}
|
||
if (onUntriggerFn) {
|
||
config.onUntrigger = onUntriggerFn;
|
||
}
|
||
window.tippy(el, config);
|
||
}
|
||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||
for (var i=0; i<noterefs.length; i++) {
|
||
const ref = noterefs[i];
|
||
tippyHover(ref, function() {
|
||
// use id or data attribute instead here
|
||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||
try { href = new URL(href).hash; } catch {}
|
||
const id = href.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note) {
|
||
return note.innerHTML;
|
||
} else {
|
||
return "";
|
||
}
|
||
});
|
||
}
|
||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||
const processXRef = (id, note) => {
|
||
// Strip column container classes
|
||
const stripColumnClz = (el) => {
|
||
el.classList.remove("page-full", "page-columns");
|
||
if (el.children) {
|
||
for (const child of el.children) {
|
||
stripColumnClz(child);
|
||
}
|
||
}
|
||
}
|
||
stripColumnClz(note)
|
||
if (id === null || id.startsWith('sec-')) {
|
||
// Special case sections, only their first couple elements
|
||
const container = document.createElement("div");
|
||
if (note.children && note.children.length > 2) {
|
||
container.appendChild(note.children[0].cloneNode(true));
|
||
for (let i = 1; i < note.children.length; i++) {
|
||
const child = note.children[i];
|
||
if (child.tagName === "P" && child.innerText === "") {
|
||
continue;
|
||
} else {
|
||
container.appendChild(child.cloneNode(true));
|
||
break;
|
||
}
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(container);
|
||
}
|
||
return container.innerHTML
|
||
} else {
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
return note.innerHTML;
|
||
}
|
||
} else {
|
||
// Remove any anchor links if they are present
|
||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||
if (anchorLink) {
|
||
anchorLink.remove();
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
// TODO in 1.5, we should make sure this works without a callout special case
|
||
if (note.classList.contains("callout")) {
|
||
return note.outerHTML;
|
||
} else {
|
||
return note.innerHTML;
|
||
}
|
||
}
|
||
}
|
||
for (var i=0; i<xrefs.length; i++) {
|
||
const xref = xrefs[i];
|
||
tippyHover(xref, undefined, function(instance) {
|
||
instance.disable();
|
||
let url = xref.getAttribute('href');
|
||
let hash = undefined;
|
||
if (url.startsWith('#')) {
|
||
hash = url;
|
||
} else {
|
||
try { hash = new URL(url).hash; } catch {}
|
||
}
|
||
if (hash) {
|
||
const id = hash.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note !== null) {
|
||
try {
|
||
const html = processXRef(id, note.cloneNode(true));
|
||
instance.setContent(html);
|
||
} finally {
|
||
instance.enable();
|
||
instance.show();
|
||
}
|
||
} else {
|
||
// See if we can fetch this
|
||
fetch(url.split('#')[0])
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.getElementById(id);
|
||
if (note !== null) {
|
||
const html = processXRef(id, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
} else {
|
||
// See if we can fetch a full url (with no hash to target)
|
||
// This is a special case and we should probably do some content thinning / targeting
|
||
fetch(url)
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.querySelector('main.content');
|
||
if (note !== null) {
|
||
// This should only happen for chapter cross references
|
||
// (since there is no id in the URL)
|
||
// remove the first header
|
||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||
note.children[0].remove();
|
||
}
|
||
const html = processXRef(null, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
}, function(instance) {
|
||
});
|
||
}
|
||
let selectedAnnoteEl;
|
||
const selectorForAnnotation = ( cell, annotation) => {
|
||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||
return selector;
|
||
}
|
||
const selectCodeLines = (annoteEl) => {
|
||
const doc = window.document;
|
||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||
const lineIds = lines.map((line) => {
|
||
return targetCell + "-" + line;
|
||
})
|
||
let top = null;
|
||
let height = null;
|
||
let parent = null;
|
||
if (lineIds.length > 0) {
|
||
//compute the position of the single el (top and bottom and make a div)
|
||
const el = window.document.getElementById(lineIds[0]);
|
||
top = el.offsetTop;
|
||
height = el.offsetHeight;
|
||
parent = el.parentElement.parentElement;
|
||
if (lineIds.length > 1) {
|
||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||
height = bottom - top;
|
||
}
|
||
if (top !== null && height !== null && parent !== null) {
|
||
// cook up a div (if necessary) and position it
|
||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||
if (div === null) {
|
||
div = window.document.createElement("div");
|
||
div.setAttribute("id", "code-annotation-line-highlight");
|
||
div.style.position = 'absolute';
|
||
parent.appendChild(div);
|
||
}
|
||
div.style.top = top - 2 + "px";
|
||
div.style.height = height + 4 + "px";
|
||
div.style.left = 0;
|
||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||
if (gutterDiv === null) {
|
||
gutterDiv = window.document.createElement("div");
|
||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||
gutterDiv.style.position = 'absolute';
|
||
const codeCell = window.document.getElementById(targetCell);
|
||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||
gutter.appendChild(gutterDiv);
|
||
}
|
||
gutterDiv.style.top = top - 2 + "px";
|
||
gutterDiv.style.height = height + 4 + "px";
|
||
}
|
||
selectedAnnoteEl = annoteEl;
|
||
}
|
||
};
|
||
const unselectCodeLines = () => {
|
||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||
elementsIds.forEach((elId) => {
|
||
const div = window.document.getElementById(elId);
|
||
if (div) {
|
||
div.remove();
|
||
}
|
||
});
|
||
selectedAnnoteEl = undefined;
|
||
};
|
||
// Handle positioning of the toggle
|
||
window.addEventListener(
|
||
"resize",
|
||
throttle(() => {
|
||
elRect = undefined;
|
||
if (selectedAnnoteEl) {
|
||
selectCodeLines(selectedAnnoteEl);
|
||
}
|
||
}, 10)
|
||
);
|
||
function throttle(fn, ms) {
|
||
let throttle = false;
|
||
let timer;
|
||
return (...args) => {
|
||
if(!throttle) { // first call gets through
|
||
fn.apply(this, args);
|
||
throttle = true;
|
||
} else { // all the others get throttled
|
||
if(timer) clearTimeout(timer); // cancel #2
|
||
timer = setTimeout(() => {
|
||
fn.apply(this, args);
|
||
timer = throttle = false;
|
||
}, ms);
|
||
}
|
||
};
|
||
}
|
||
// Attach click handler to the DT
|
||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||
for (const annoteDlNode of annoteDls) {
|
||
annoteDlNode.addEventListener('click', (event) => {
|
||
const clickedEl = event.target;
|
||
if (clickedEl !== selectedAnnoteEl) {
|
||
unselectCodeLines();
|
||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||
if (activeEl) {
|
||
activeEl.classList.remove('code-annotation-active');
|
||
}
|
||
selectCodeLines(clickedEl);
|
||
clickedEl.classList.add('code-annotation-active');
|
||
} else {
|
||
// Unselect the line
|
||
unselectCodeLines();
|
||
clickedEl.classList.remove('code-annotation-active');
|
||
}
|
||
});
|
||
}
|
||
const findCites = (el) => {
|
||
const parentEl = el.parentElement;
|
||
if (parentEl) {
|
||
const cites = parentEl.dataset.cites;
|
||
if (cites) {
|
||
return {
|
||
el,
|
||
cites: cites.split(' ')
|
||
};
|
||
} else {
|
||
return findCites(el.parentElement)
|
||
}
|
||
} else {
|
||
return undefined;
|
||
}
|
||
};
|
||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||
for (var i=0; i<bibliorefs.length; i++) {
|
||
const ref = bibliorefs[i];
|
||
const citeInfo = findCites(ref);
|
||
if (citeInfo) {
|
||
tippyHover(citeInfo.el, function() {
|
||
var popup = window.document.createElement('div');
|
||
citeInfo.cites.forEach(function(cite) {
|
||
var citeDiv = window.document.createElement('div');
|
||
citeDiv.classList.add('hanging-indent');
|
||
citeDiv.classList.add('csl-entry');
|
||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||
if (biblioDiv) {
|
||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||
}
|
||
popup.appendChild(citeDiv);
|
||
});
|
||
return popup.innerHTML;
|
||
});
|
||
}
|
||
}
|
||
});
|
||
</script>
|
||
</div> <!-- /content -->
|
||
|
||
|
||
|
||
|
||
</body></html> |