From 3a03367ee422d1686d3f26405fc39722206746ac Mon Sep 17 00:00:00 2001 From: vibemarketerpromax Date: Tue, 3 Mar 2026 14:06:27 +0530 Subject: [PATCH] Harden Notion author image resolution and add avatar audit gate --- .github/workflows/ci.yml | 5 + lib/notion-blog.ts | 151 +++++++++++++++++++++--- lib/notion-image-cache.ts | 43 ++++++- package.json | 1 + scripts/audit-author-avatars.mjs | 192 +++++++++++++++++++++++++++++++ 5 files changed, 374 insertions(+), 18 deletions(-) create mode 100644 scripts/audit-author-avatars.mjs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b9efa9e..1b8d24f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,5 +38,10 @@ jobs: env: NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }} + - name: Audit blog author avatars + run: npm run authors:audit + env: + NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }} + - name: Check bundle size run: npm run bundle:check diff --git a/lib/notion-blog.ts b/lib/notion-blog.ts index d2000f39..a019a7b3 100644 --- a/lib/notion-blog.ts +++ b/lib/notion-blog.ts @@ -71,6 +71,7 @@ export interface BlogContent { type NotionProperties = PageObjectResponse["properties"]; type NotionProperty = NotionProperties[string]; +type NotionPropertyEntry = { key: string; property: NotionProperty }; // ============================================================================= // Helper Functions for Property Extraction @@ -164,6 +165,118 @@ function getPerson(property: NotionProperty | undefined): string | null { return null; } +function normalizePropertyKey(key: string): string { + return key.toLowerCase().replace(/[^a-z0-9]/g, ""); +} + +function findPropertyByExactNames( + properties: NotionProperties, + names: string[], + allowedTypes?: NotionProperty["type"][] +): NotionPropertyEntry | null { + const normalized = new Set(names.map(normalizePropertyKey)); + + for (const [key, property] of Object.entries(properties)) { + if (normalized.has(normalizePropertyKey(key))) { + if (!allowedTypes || allowedTypes.includes(property.type)) { + return { key, property }; + } + } + } + + return null; +} + +function findPropertyByPattern( + properties: NotionProperties, + pattern: RegExp, + allowedTypes?: NotionProperty["type"][] +): NotionPropertyEntry | null { + for (const [key, property] of Object.entries(properties)) { + if (pattern.test(key)) { + if (!allowedTypes || allowedTypes.includes(property.type)) { + return { key, property }; + } + } + } + + return null; +} + +function getAvatarUrlFromPeople(property: NotionProperty | undefined): string | null { + if (!property || property.type !== "people" || property.people.length === 0) { + return null; + } + + const person = property.people[0] as { avatar_url?: string | null }; + if (person.avatar_url && person.avatar_url.trim().length > 0) { + return person.avatar_url; + } + + return null; +} + +function getRichTextUrl(property: NotionProperty | undefined): string | null { + if (!property || property.type !== "rich_text" || property.rich_text.length === 0) { + return null; + } + + for (const segment of property.rich_text) { + if (segment.href && /^https?:\/\//i.test(segment.href)) { + return segment.href; + } + if (/^https?:\/\//i.test(segment.plain_text)) { + return segment.plain_text; + } + } + + return null; +} + +function getAuthorPeopleProperty(properties: NotionProperties): NotionPropertyEntry | null { + return ( + findPropertyByExactNames(properties, ["Author"], ["people"]) || + findPropertyByPattern(properties, /author/i, ["people"]) + ); +} + +function getAuthorImageFromProperties( + properties: NotionProperties +): { url: string; source: string } | null { + const exactMatch = findPropertyByExactNames( + properties, + ["Author image", "Author Image", "Author Photo", "Author Avatar", "Profile Photo", "Profile Image"], + ["files", "url", "rich_text"] + ); + + const fuzzyMatch = + exactMatch || + findPropertyByPattern( + properties, + /(author|profile).*(image|photo|avatar)|(image|photo|avatar).*(author|profile)/i, + ["files", "url", "rich_text"] + ); + + if (!fuzzyMatch) { + return null; + } + + const { key, property } = fuzzyMatch; + const url = + getFiles(property) || + getUrl(property) || + getRichTextUrl(property); + + if (!url) { + return null; + } + + return { + url, + source: `${key} (${property.type})`, + }; +} + // ============================================================================= // Category Mapping @@ -314,7 +427,9 @@ async function transformNotionPageToBlogPost( getRichText(props["topic"]) || getSelect(props["Category"]) || getSelect(props["category"]); + const authorPeopleProperty = getAuthorPeopleProperty(props); const authorName = + getPerson(authorPeopleProperty?.property) || getPerson(props["Author"]) || getRichText(props["Author"]) || getSelect(props["Author"]); @@ -348,18 +463,14 @@ async function transformNotionPageToBlogPost( const readTime = getNumber(props["Read Time"]) || 5; // Slug property (renamed from URL) - use rich text Slug as primary const customSlug = getRichText(props["Slug"]) || getUrl(props["URL"]); - // Author photo from Notion files property - const authorImageUrl = - getFiles(props["Author image"]) || - getFiles(props["Author Image"]) || - getFiles(props["Author Photo"]); - if (!authorImageUrl && authorName) { - // Log available file-type properties to help diagnose missing author photos - const fileProps = Object.entries(props) - .filter(([, v]) => v.type === "files") - .map(([k, v]) => `${k}: ${(v as { files?: unknown[] }).files?.length ?? 0} file(s)`); - console.warn(`[author-photo] No image found for "${authorName}". File properties: [${fileProps.join(", ")}]`); - } + const explicitAuthorImage = getAuthorImageFromProperties(props); + const peopleAvatarUrl = getAvatarUrlFromPeople(authorPeopleProperty?.property); + const authorImageUrl = explicitAuthorImage?.url || peopleAvatarUrl || null; + const authorImageSource = + explicitAuthorImage?.source || + (peopleAvatarUrl + ? `${authorPeopleProperty?.key || "Author"} (people.avatar_url)` + : null); // Cover image from Notion files property (primary) with fallbacks const featuredImage = @@ -382,10 +493,24 @@ async function transformNotionPageToBlogPost( slug = generateSlug(title); } + if (!authorImageUrl && authorName) { + // Log available candidate properties so missing photos can be fixed quickly in Notion. + const imageLikeProps = Object.entries(props) + .filter(([k]) => /(author|profile|image|photo|avatar)/i.test(k)) + .map(([k, v]) => `${k} (${v.type})`); + console.warn( + `[author-photo] Missing author photo for slug="${slug}", author="${authorName}". Candidate properties: [${imageLikeProps.join(", ")}]` + ); + } + // Map category and author const category = mapCategory(categoryName); const authorId = authorName?.toLowerCase().replace(/\s+/g, "-") || "procedure-team"; - const cachedAuthorPhoto = await cacheAuthorPhoto(authorImageUrl, authorId); + const cachedAuthorPhoto = await cacheAuthorPhoto(authorImageUrl, authorId, { + authorName: authorName || undefined, + slug, + source: authorImageSource || undefined, + }); const author = mapAuthor(authorName, authorBio, authorTitle, cachedAuthorPhoto); // Cache cover image to public folder (downloads from Notion and saves locally) diff --git a/lib/notion-image-cache.ts b/lib/notion-image-cache.ts index bb647720..eeda06a0 100644 --- a/lib/notion-image-cache.ts +++ b/lib/notion-image-cache.ts @@ -109,11 +109,17 @@ function isNotionUrl(url: string): boolean { } // Download and cache a single image, converting non-browser formats to JPEG -async function downloadImage(url: string, localPath: string): Promise { +async function downloadImage( + url: string, + localPath: string, + context?: string +): Promise { try { const response = await fetch(url); if (!response.ok) { - console.warn(`Failed to download image: ${url} (${response.status})`); + console.warn( + `[image-cache] Failed to download image${context ? ` (${context})` : ""}: ${url} (${response.status})` + ); return false; } @@ -159,7 +165,10 @@ async function downloadImage(url: string, localPath: string): Promise { writeFileSync(localPath, converted); return true; } catch (error) { - console.warn(`Error downloading image ${url}:`, error); + console.warn( + `[image-cache] Error downloading image${context ? ` (${context})` : ""}: ${url}`, + error + ); return false; } } @@ -321,7 +330,12 @@ export async function cacheBlogContentImage( */ export async function cacheAuthorPhoto( url: string | null, - authorId: string + authorId: string, + context?: { + authorName?: string; + slug?: string; + source?: string; + } ): Promise { if (!url) return null; @@ -346,7 +360,26 @@ export async function cacheAuthorPhoto( return `/content/cache/authors/${legacyFilename}`; } - const success = await downloadImage(url, localPath); + const contextLabel = [ + context?.slug ? `slug=${context.slug}` : null, + context?.authorName ? `author=${context.authorName}` : null, + context?.source ? `source=${context.source}` : null, + ] + .filter(Boolean) + .join(", "); + + const success = await downloadImage( + url, + localPath, + contextLabel || `authorId=${authorId}` + ); + + if (!success) { + console.warn( + `[author-photo] Failed to cache author image (${contextLabel || `authorId=${authorId}`}) from URL: ${url}` + ); + } + return success ? publicPath : null; } diff --git a/package.json b/package.json index bdc56d0a..6bda0795 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "typecheck": "tsc --noEmit", "test:e2e": "playwright test", "seo:check": "node scripts/seo-check.mjs", + "authors:audit": "node scripts/audit-author-avatars.mjs", "lighthouse": "lhci autorun", "bundle:check": "node scripts/bundle-budget.mjs", "bundle:analyze": "ANALYZE=true next build", diff --git a/scripts/audit-author-avatars.mjs b/scripts/audit-author-avatars.mjs new file mode 100644 index 00000000..2cfe70c2 --- /dev/null +++ b/scripts/audit-author-avatars.mjs @@ -0,0 +1,192 @@ +import { Client } from "@notionhq/client"; + +const BLOG_DATA_SOURCE_ID = "dda27538-3e23-43b5-9668-a0103ebf2e59"; + +function normalizePropertyKey(key) { + return key.toLowerCase().replace(/[^a-z0-9]/g, ""); +} + +function getTitle(properties) { + const titleProp = properties.Name || properties.Title; + if (!titleProp || titleProp.type !== "title" || titleProp.title.length === 0) { + return null; + } + return titleProp.title.map((t) => t.plain_text).join(""); +} + +function getStatus(properties) { + const status = properties.Status; + if (!status || status.type !== "select" || !status.select) return null; + return status.select.name; +} + +function getAuthorName(properties) { + const author = properties.Author; + if (!author) return null; + if (author.type === "people" && author.people.length > 0) { + return author.people[0].name || null; + } + if (author.type === "rich_text" && author.rich_text.length > 0) { + return author.rich_text.map((t) => t.plain_text).join(""); + } + if (author.type === "select" && author.select) { + return author.select.name; + } + return null; +} + +function getPeopleAvatarUrl(properties) { + const author = properties.Author; + if (!author || author.type !== "people" || author.people.length === 0) { + return null; + } + return author.people[0].avatar_url || null; +} + +function getImageUrlFromProperty(property) { + if (!property) return null; + if (property.type === "files" && property.files.length > 0) { + const file = property.files[0]; + return file.type === "external" ? file.external.url : file.file.url; + } + if (property.type === "url") { + return property.url || null; + } + if (property.type === "rich_text" && property.rich_text.length > 0) { + for (const segment of property.rich_text) { + if (segment.href && /^https?:\/\//i.test(segment.href)) { + return segment.href; + } + if (/^https?:\/\//i.test(segment.plain_text)) { + return segment.plain_text; + } + } + } + return null; +} + +function getAuthorImage(properties) { + const exactNames = new Set( + ["Author image", "Author Image", "Author Photo", "Author Avatar", "Profile Photo", "Profile Image"].map( + normalizePropertyKey + ) + ); + const allowedTypes = new Set(["files", "url", "rich_text"]); + + let candidate = null; + for (const [key, property] of Object.entries(properties)) { + if (!allowedTypes.has(property.type)) continue; + const normalized = normalizePropertyKey(key); + if (exactNames.has(normalized)) { + candidate = { key, property }; + break; + } + } + + if (!candidate) { + for (const [key, property] of Object.entries(properties)) { + if (!allowedTypes.has(property.type)) continue; + if (/(author|profile).*(image|photo|avatar)|(image|photo|avatar).*(author|profile)/i.test(key)) { + candidate = { key, property }; + break; + } + } + } + + if (!candidate) return null; + const url = getImageUrlFromProperty(candidate.property); + if (!url) return null; + return { url, source: `${candidate.key} (${candidate.property.type})` }; +} + +async function getPublishedBlogPages(client) { + const pages = []; + let cursor = undefined; + + do { + const response = await client.dataSources.query({ + data_source_id: BLOG_DATA_SOURCE_ID, + page_size: 100, + start_cursor: cursor, + }); + + pages.push( + ...response.results.filter( + (result) => result.object === "page" && result.properties + ) + ); + cursor = response.next_cursor || undefined; + } while (cursor); + + return pages; +} + +async function main() { + const notionToken = process.env.NOTION_TOKEN; + if (!notionToken) { + console.warn( + "[author-audit] NOTION_TOKEN is not configured. Skipping Notion author avatar audit." + ); + process.exit(0); + } + + const notion = new Client({ + auth: notionToken, + notionVersion: "2025-09-03", + }); + + const pages = await getPublishedBlogPages(notion); + const published = pages.filter((page) => { + const status = getStatus(page.properties); + return status === "Published" || status === "Live"; + }); + + if (published.length === 0) { + console.error( + "[author-audit] No published blog pages returned from Notion. Check data source/status values." + ); + process.exit(1); + } + + const missing = []; + for (const page of published) { + const properties = page.properties; + const title = getTitle(properties) || "(untitled)"; + const author = getAuthorName(properties) || "(unknown author)"; + const explicitImage = getAuthorImage(properties); + const peopleAvatar = getPeopleAvatarUrl(properties); + const resolved = explicitImage?.url || peopleAvatar; + + if (!resolved) { + const candidateProps = Object.entries(properties) + .filter(([key]) => /(author|profile|image|photo|avatar)/i.test(key)) + .map(([key, prop]) => `${key} (${prop.type})`); + missing.push({ + title, + author, + candidates: candidateProps.join(", "), + }); + } + } + + if (missing.length > 0) { + console.error( + `[author-audit] Found ${missing.length} published post(s) with missing author images:` + ); + for (const row of missing) { + console.error( + `- title="${row.title}", author="${row.author}", candidateProps=[${row.candidates}]` + ); + } + process.exit(1); + } + + console.log( + `[author-audit] OK: ${published.length}/${published.length} published posts have author images (explicit field or Notion people avatar).` + ); +} + +main().catch((error) => { + console.error("[author-audit] Unexpected error:", error); + process.exit(1); +});