ai-tools-suite/docs/privacy-scanner-overview.html
2025-12-27 15:33:06 +00:00

608 lines
No EOL
31 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.6.33">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="AI Tools Suite">
<meta name="dcterms.date" content="2024-12-23">
<title>Privacy Scanner: Multi-Layer PII Detection for Enterprise Data Protection</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
</style>
<script src="privacy-scanner-overview_files/libs/clipboard/clipboard.min.js"></script>
<script src="privacy-scanner-overview_files/libs/quarto-html/quarto.js"></script>
<script src="privacy-scanner-overview_files/libs/quarto-html/popper.min.js"></script>
<script src="privacy-scanner-overview_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="privacy-scanner-overview_files/libs/quarto-html/anchor.min.js"></script>
<link href="privacy-scanner-overview_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="privacy-scanner-overview_files/libs/quarto-html/quarto-syntax-highlighting-07ba0ad10f5680c660e360ac31d2f3b6.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="privacy-scanner-overview_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="privacy-scanner-overview_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="privacy-scanner-overview_files/libs/bootstrap/bootstrap-fe6593aca1dacbc749dc3d2ba78c8639.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
</head>
<body>
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction">Introduction</a></li>
<li><a href="#the-challenge-of-modern-pii-detection" id="toc-the-challenge-of-modern-pii-detection" class="nav-link" data-scroll-target="#the-challenge-of-modern-pii-detection">The Challenge of Modern PII Detection</a></li>
<li><a href="#architecture-the-eight-layer-detection-pipeline" id="toc-architecture-the-eight-layer-detection-pipeline" class="nav-link" data-scroll-target="#architecture-the-eight-layer-detection-pipeline">Architecture: The Eight-Layer Detection Pipeline</a>
<ul class="collapse">
<li><a href="#layer-1-standard-regex-matching" id="toc-layer-1-standard-regex-matching" class="nav-link" data-scroll-target="#layer-1-standard-regex-matching">Layer 1: Standard Regex Matching</a></li>
<li><a href="#layer-2-text-normalization" id="toc-layer-2-text-normalization" class="nav-link" data-scroll-target="#layer-2-text-normalization">Layer 2: Text Normalization</a></li>
<li><a href="#layer-2.5-json-blob-extraction" id="toc-layer-2.5-json-blob-extraction" class="nav-link" data-scroll-target="#layer-2.5-json-blob-extraction">Layer 2.5: JSON Blob Extraction</a></li>
<li><a href="#layer-2.6-base64-auto-decoding" id="toc-layer-2.6-base64-auto-decoding" class="nav-link" data-scroll-target="#layer-2.6-base64-auto-decoding">Layer 2.6: Base64 Auto-Decoding</a></li>
<li><a href="#layer-2.7-spelled-out-number-detection" id="toc-layer-2.7-spelled-out-number-detection" class="nav-link" data-scroll-target="#layer-2.7-spelled-out-number-detection">Layer 2.7: Spelled-Out Number Detection</a></li>
<li><a href="#layer-2.8-non-latin-character-support" id="toc-layer-2.8-non-latin-character-support" class="nav-link" data-scroll-target="#layer-2.8-non-latin-character-support">Layer 2.8: Non-Latin Character Support</a></li>
<li><a href="#layer-3-context-based-confidence-scoring" id="toc-layer-3-context-based-confidence-scoring" class="nav-link" data-scroll-target="#layer-3-context-based-confidence-scoring">Layer 3: Context-Based Confidence Scoring</a></li>
<li><a href="#layer-4-checksum-verification" id="toc-layer-4-checksum-verification" class="nav-link" data-scroll-target="#layer-4-checksum-verification">Layer 4: Checksum Verification</a></li>
</ul></li>
<li><a href="#security-architecture" id="toc-security-architecture" class="nav-link" data-scroll-target="#security-architecture">Security Architecture</a></li>
<li><a href="#detection-categories" id="toc-detection-categories" class="nav-link" data-scroll-target="#detection-categories">Detection Categories</a></li>
<li><a href="#practical-applications" id="toc-practical-applications" class="nav-link" data-scroll-target="#practical-applications">Practical Applications</a></li>
<li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion">Conclusion</a></li>
</ul>
</nav>
</div>
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Privacy Scanner: Multi-Layer PII Detection for Enterprise Data Protection</h1>
<div class="quarto-categories">
<div class="quarto-category">privacy</div>
<div class="quarto-category">pii-detection</div>
<div class="quarto-category">data-protection</div>
<div class="quarto-category">compliance</div>
</div>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>AI Tools Suite </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">December 23, 2024</p>
</div>
</div>
</div>
</header>
<section id="introduction" class="level2">
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
<p>In an era where data breaches make headlines daily and privacy regulations like GDPR, CCPA, and HIPAA impose significant penalties for non-compliance, organizations need robust tools to identify and protect sensitive information. The <strong>Privacy Scanner</strong> is a production-grade PII (Personally Identifiable Information) detection system designed to help data teams, compliance officers, and developers identify sensitive data before it causes problems.</p>
<p>Unlike simple regex-based scanners that generate excessive false positives, the Privacy Scanner employs an eight-layer detection pipeline that balances precision with recall. It can detect not just obvious PII like email addresses and phone numbers, but also deliberately obfuscated data, encoded secrets, and international formats that simpler tools miss entirely.</p>
</section>
<section id="the-challenge-of-modern-pii-detection" class="level2">
<h2 class="anchored" data-anchor-id="the-challenge-of-modern-pii-detection">The Challenge of Modern PII Detection</h2>
<p>Traditional PII scanners face several limitations. They struggle with obfuscated data where users write “john [at] example [dot] com” to evade detection. They cannot decode Base64-encoded secrets hidden in configuration files. They miss spelled-out numbers like “nine zero zero dash twelve dash eight eight two one” that represent Social Security Numbers. And they fail entirely on non-Latin character sets, leaving Greek, Cyrillic, and other international data completely unscanned.</p>
<p>The Privacy Scanner addresses each of these challenges through its multi-layer architecture, processing text through successive detection stages that build upon each other.</p>
</section>
<section id="architecture-the-eight-layer-detection-pipeline" class="level2">
<h2 class="anchored" data-anchor-id="architecture-the-eight-layer-detection-pipeline">Architecture: The Eight-Layer Detection Pipeline</h2>
<section id="layer-1-standard-regex-matching" class="level3">
<h3 class="anchored" data-anchor-id="layer-1-standard-regex-matching">Layer 1: Standard Regex Matching</h3>
<p>The foundation layer applies over 40 carefully crafted regular expression patterns to identify common PII types. These patterns detect email addresses, phone numbers (US and international), Social Security Numbers, credit card numbers, IP addresses, physical addresses, IBANs, and cloud provider secrets from AWS, Azure, GCP, GitHub, and Stripe.</p>
<p>Each pattern is designed for specificity. For example, the SSN pattern requires explicit separators (dashes, dots, or spaces) to avoid matching random nine-digit sequences. Credit card patterns validate against known issuer prefixes before flagging potential matches.</p>
</section>
<section id="layer-2-text-normalization" class="level3">
<h3 class="anchored" data-anchor-id="layer-2-text-normalization">Layer 2: Text Normalization</h3>
<p>This layer transforms obfuscated text back to its canonical form. It converts “[dot]” and “(dot)” to periods, “[at]” and “(at)” to @ symbols, and removes separators from numeric sequences. Spaced-out characters like “t-e-s-t” are joined back together. After normalization, Layer 1 patterns are re-applied to catch previously hidden PII.</p>
</section>
<section id="layer-2.5-json-blob-extraction" class="level3">
<h3 class="anchored" data-anchor-id="layer-2.5-json-blob-extraction">Layer 2.5: JSON Blob Extraction</h3>
<p>Modern applications frequently embed data within JSON structures. This layer extracts JSON objects from text, recursively traverses their contents, and scans each string value for PII. A Stripe API key buried three levels deep in a JSON configuration will be detected and flagged as <code>STRIPE_KEY_IN_JSON</code>.</p>
</section>
<section id="layer-2.6-base64-auto-decoding" class="level3">
<h3 class="anchored" data-anchor-id="layer-2.6-base64-auto-decoding">Layer 2.6: Base64 Auto-Decoding</h3>
<p>Base64 encoding is commonly used to hide secrets in configuration files and environment variables. This layer identifies potential Base64 strings, decodes them, validates that the decoded content appears to be meaningful text, and scans the result for PII. An encoded password like <code>U2VjcmV0IFBhc3N3b3JkOiBBZG1pbiExMjM0NQ==</code> will be decoded and the contained password detected.</p>
</section>
<section id="layer-2.7-spelled-out-number-detection" class="level3">
<h3 class="anchored" data-anchor-id="layer-2.7-spelled-out-number-detection">Layer 2.7: Spelled-Out Number Detection</h3>
<p>This NLP-lite layer converts written numbers to digits. The phrase “nine zero zero dash twelve dash eight eight two one” becomes “900-12-8821”, which is then checked against SSN and other numeric patterns. This catches attempts to evade detection by spelling out sensitive numbers.</p>
</section>
<section id="layer-2.8-non-latin-character-support" class="level3">
<h3 class="anchored" data-anchor-id="layer-2.8-non-latin-character-support">Layer 2.8: Non-Latin Character Support</h3>
<p>For international data, this layer transliterates Greek and Cyrillic characters to Latin equivalents before scanning. It also directly detects EU VAT numbers across all 27 member states using country-specific patterns. A Greek customer record with “EL123456789” as a VAT number will be properly identified.</p>
</section>
<section id="layer-3-context-based-confidence-scoring" class="level3">
<h3 class="anchored" data-anchor-id="layer-3-context-based-confidence-scoring">Layer 3: Context-Based Confidence Scoring</h3>
<p>Raw pattern matches are adjusted based on surrounding context. Keywords like “ssn”, “social security”, or “card number” boost confidence scores. Anti-context keywords like “test”, “example”, or “batch” reduce confidence. Future dates are penalized when detected as potential birth dates since people cannot be born in the future.</p>
</section>
<section id="layer-4-checksum-verification" class="level3">
<h3 class="anchored" data-anchor-id="layer-4-checksum-verification">Layer 4: Checksum Verification</h3>
<p>The final layer validates detected patterns using mathematical checksums. Credit card numbers are verified using the Luhn algorithm. IBANs are validated using the MOD-97 checksum. Numbers that fail validation are either discarded or reclassified as “POSSIBLE_CARD_PATTERN” with reduced confidence, dramatically reducing false positives.</p>
</section>
</section>
<section id="security-architecture" class="level2">
<h2 class="anchored" data-anchor-id="security-architecture">Security Architecture</h2>
<p>The Privacy Scanner implements privacy-by-design principles throughout its architecture.</p>
<p><strong>Ephemeral Processing</strong>: All data processing occurs in memory using DuckDBs <code>:memory:</code> mode. No PII is ever written to persistent storage or log files. Temporary files used for CSV parsing are immediately deleted after processing.</p>
<p><strong>Client-Side Redaction Mode</strong>: For ultra-sensitive deployments, the scanner offers a coordinates-only mode. In this configuration, the backend returns only the positions (start, end) and types of detected PII without the actual values. The frontend then performs masking locally, ensuring that sensitive data never leaves the users browser in its raw form.</p>
</section>
<section id="detection-categories" class="level2">
<h2 class="anchored" data-anchor-id="detection-categories">Detection Categories</h2>
<p>The scanner organizes detected entities into severity-weighted categories:</p>
<p><strong>Critical (Score 95-100)</strong>: SSN, Credit Cards, Private Keys, AWS/Azure/GCP credentials <strong>High (Score 80-94)</strong>: GitHub tokens, Stripe keys, passwords, Medicare IDs <strong>Medium (Score 50-79)</strong>: IBAN, addresses, medical record numbers, EU VAT numbers <strong>Low (Score 20-49)</strong>: Email addresses, phone numbers, IP addresses, dates</p>
<p>Risk scores aggregate these weights with confidence levels to produce an overall assessment ranging from LOW to CRITICAL.</p>
</section>
<section id="practical-applications" class="level2">
<h2 class="anchored" data-anchor-id="practical-applications">Practical Applications</h2>
<p><strong>Pre-Release Data Validation</strong>: Before sharing datasets with partners or publishing to data marketplaces, scan for inadvertent PII inclusion.</p>
<p><strong>Log File Auditing</strong>: Scan application logs, error messages, and debug output for accidentally logged credentials or customer data.</p>
<p><strong>Document Review</strong>: Check contracts, reports, and documentation for sensitive information before distribution.</p>
<p><strong>Compliance Reporting</strong>: Generate evidence of PII detection capabilities for GDPR, CCPA, or HIPAA audit requirements.</p>
<p><strong>Developer Tooling</strong>: Integrate into CI/CD pipelines to catch secrets committed to version control.</p>
</section>
<section id="conclusion" class="level2">
<h2 class="anchored" data-anchor-id="conclusion">Conclusion</h2>
<p>The Privacy Scanner represents a significant advancement over traditional pattern-matching approaches to PII detection. Its eight-layer architecture handles real-world data complexity including obfuscation, encoding, internationalization, and contextual ambiguity. Combined with privacy-preserving processing modes and comprehensive detection coverage, it provides organizations with a practical tool for managing sensitive data risk.</p>
<p>Whether you are a data engineer preparing datasets for machine learning, a compliance officer auditing data flows, or a developer building privacy-aware applications, the Privacy Scanner offers the depth of detection and operational flexibility needed for production environments.</p>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
// For code content inside modals, clipBoardJS needs to be initialized with a container option
// TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>