Skip to content

Commit 772918d

Browse files
add google doc parser
Co-authored-by: austinpower1258 <austinpower1258@users.noreply.github.com>
1 parent a23a326 commit 772918d

20 files changed

+1300
-5
lines changed

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
"eslint": "^7.22.0",
2525
"eslint-config-prettier": "^8.1.0",
2626
"eslint-plugin-svelte3": "^3.2.0",
27+
"googleapis": "^76.0.0",
28+
"hast": "^1.0.0",
29+
"hast-util-heading": "^2.0.0",
30+
"hast-util-heading-rank": "^2.1.0",
31+
"hast-util-to-text": "^3.0.0",
32+
"hastscript": "^7.0.1",
2733
"performant-array-to-tree": "^1.9.1",
2834
"postcss": "^8.3.0",
2935
"prettier": "~2.2.1",

schema.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ CREATE OR REPLACE RECURSIVE VIEW topics_tree (
4747
parent_id,
4848
item_id,
4949
slug,
50+
doc_url,
5051
path,
5152
slug_arr
5253
) AS
@@ -56,6 +57,7 @@ CREATE OR REPLACE RECURSIVE VIEW topics_tree (
5657
t1.parent_id,
5758
t1.item_id,
5859
t1.slug,
60+
t1.doc_url,
5961
t1.slug as path,
6062
array[t1.slug]::text[] as slug_arr
6163
FROM
@@ -69,6 +71,7 @@ CREATE OR REPLACE RECURSIVE VIEW topics_tree (
6971
t2.parent_id,
7072
t2.item_id,
7173
t2.slug,
74+
t2.doc_url,
7275
t_tree.path || '/' || t2.slug as path,
7376
t_tree.slug_arr || t2.slug as slug_arr
7477
FROM

src/lib/CourseEditor/Lesson.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@
4444
<div class="rounded-b space-x-4 bg-gray-50 border-t flex items-center justify-start px-6 py-3">
4545
<button
4646
on:click={addLesson}
47-
class="inline-flex justify-center py-2 px-4 border border-transparent shadow-sm text-xs font-medium rounded-md text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
47+
class="inline-flex justify-center py-2 px-3 border border-transparent shadow-sm text-xs font-medium rounded-md text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500"
4848
>Add new lesson
4949
</button>
5050
<button
51-
class="flex px-3 py-2 text-sm justify-center items-center transition border rounded-md text-gray-600 dark svelte-1fb58l0"
51+
class="flex px-3 py-2 text-xs justify-center items-center transition border rounded-md text-gray-600 dark svelte-1fb58l0"
5252
type="submit">Save</button
5353
>
5454
</div>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
declare module 'hast-util-classnames' {
2+
function classnames(...node: any[]): any;
3+
export = classnames;
4+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { Element } from 'hast';
2+
3+
declare module 'hast-util-heading-rank' {
4+
function headingRank(node: Element): number;
5+
export = isHeading;
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { Element } from 'hast';
2+
3+
declare module 'hast-util-heading' {
4+
function isHeading(node: Element): boolean;
5+
export = isHeading;
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { Element } from 'hast';
2+
3+
declare module 'hast-util-to-text' {
4+
function toText(node: Element): string;
5+
export = toText;
6+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
declare module 'remark-rehype' {
2+
const x: any;
3+
export = x;
4+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
lisp is cool
2+
3+
<div>
4+
</h1>
5+
jsdlakfjas
6+
</h1>
7+
</div>
8+
9+
s-expression
10+
(div
11+
(h1
12+
(jsdlakfsdja)))
13+
14+
HTML is XML and XML is a treateeeeeeey
15+
16+
express ANY code as a treee
17+
18+
### LISP BIG MAN IDEAS
19+
20+
<factorial>
21+
<if>
22+
<zerop>
23+
</if>
24+
</factorial>
25+
26+
"Lisp is worth learning for the profound enlightenment experience you will have when you finally get it; that experience will make you a better programmer for the rest of your days, even if you never actually use Lisp itself a lot."
27+
28+
29+
"Greenspun's Tenth Rule of Programming: any sufficiently complicated C or Fortran program contains an ad hoc informally-specified bug-ridden slow implementation of half of Common Lisp."
30+
31+
- Philip Greenspun
32+
33+
(defun factorial (x)
34+
(if (zerop x)
35+
1
36+
(* x (factorial (- x 1)))))
37+
38+
basically a list
39+
40+
intermediate format
41+
from google doc -> our custom html
42+
- table of contents
43+
- other metadata
44+
45+
46+
Things we need:
47+
- Parse json into sexprs
48+
- Parse sexprs into the DOM
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Multiple H1s ARE semantic: https://www.youtube.com/watch?v=WsgrSxCmMbM
2+
import type { docs_v1 } from 'googleapis/build/src/apis/docs/v1';
3+
import { h } from 'hastscript';
4+
import { resolveTableOfContents } from './utils/resolveTableOfContents';
5+
import type { Element } from 'hast';
6+
import { resolveChildren } from './utils/resolveChildren';
7+
import { resolveHeadingLinks } from './utils/resolveHeadingLinks';
8+
import { resolveListElements } from './utils/resolveListElements';
9+
import { resolveList } from './utils/resolveList';
10+
import type { ResolveListProps } from './utils/resolveList';
11+
12+
import toText from 'hast-util-to-text';
13+
import { Slugger } from '$lib/utils/slugger';
14+
15+
const STYLE_TAG_MAP = new Map([
16+
['NORMAL_TEXT', 'p'],
17+
['SUBTITLE', 'p'],
18+
['TITLE', 'h1'],
19+
['HEADING_1', 'h1'],
20+
['HEADING_2', 'h2'],
21+
['HEADING_3', 'h3'],
22+
['HEADING_4', 'h4'],
23+
['HEADING_5', 'h5'],
24+
['HEADING_6', 'h6']
25+
]);
26+
27+
interface ParsedDocument {
28+
documentRoot: Element;
29+
tableOfContents: Element;
30+
title: string | null | undefined;
31+
documentId: string | null | undefined;
32+
revisionId: string | null | undefined;
33+
}
34+
35+
export function parseGoogleDoc(document: docs_v1.Schema$Document): ParsedDocument {
36+
if (!document) throw new Error('Document is undefined.');
37+
38+
const slugger = new Slugger();
39+
const {
40+
body,
41+
documentStyle,
42+
lists,
43+
documentId,
44+
namedStyles,
45+
revisionId,
46+
title,
47+
inlineObjects
48+
} = document;
49+
const listsMap = new Map(Object.entries(lists ?? {}));
50+
const inlineObjectsMap = new Map(Object.entries(inlineObjects ?? {}));
51+
const resolvedListsIds = new Map<string, boolean>();
52+
const listElementsMap = resolveListElements(body?.content, listsMap);
53+
let customTitle = null;
54+
55+
const _resolveChildren = (elements?: docs_v1.Schema$ParagraphElement[]): Element[] =>
56+
resolveChildren(elements, inlineObjectsMap);
57+
const _resolveHeadingLinks = (element: Element): Element => resolveHeadingLinks(element, slugger);
58+
const _resolveList = (listElements: ResolveListProps | undefined): Element =>
59+
resolveList(listElements, _resolveChildren);
60+
61+
const content: Element[] | undefined = body?.content
62+
?.filter((structuralElement) => !!structuralElement.paragraph)
63+
.map((structuralElement) => {
64+
if (!structuralElement.paragraph?.bullet?.listId) return structuralElement;
65+
66+
const alreadyVisitedList = resolvedListsIds.get(structuralElement.paragraph.bullet.listId);
67+
68+
if (structuralElement.paragraph.bullet.listId && !alreadyVisitedList) {
69+
resolvedListsIds.set(structuralElement.paragraph.bullet.listId, true);
70+
return structuralElement;
71+
}
72+
73+
return null;
74+
})
75+
.filter((structuralElement) => !!structuralElement?.paragraph)
76+
.map((structuralElement) => {
77+
if (!structuralElement?.paragraph) throw new Error('Empty paragraph');
78+
79+
const { paragraph } = structuralElement;
80+
const paragraphStyleKey = paragraph?.paragraphStyle?.namedStyleType;
81+
const paragraphTag = paragraphStyleKey ? STYLE_TAG_MAP.get(paragraphStyleKey) : 'div';
82+
const inlineObjectElement = paragraph?.elements?.[0].inlineObjectElement;
83+
const bullet = paragraph?.bullet;
84+
const children = _resolveChildren(paragraph?.elements);
85+
86+
if (bullet && bullet.listId) {
87+
const { listId } = bullet;
88+
const listElements = listElementsMap.get(listId);
89+
90+
return _resolveList(listElements);
91+
}
92+
93+
if (inlineObjectElement) {
94+
return h('div', children);
95+
}
96+
97+
if (paragraphStyleKey === 'TITLE') {
98+
customTitle = toText(children as any)[0];
99+
return h(
100+
'h1',
101+
{
102+
class: 'font-serif-display tracking-tight text-6xl pb-10 heading-title',
103+
dataType: 'title',
104+
id: 'heading-title'
105+
},
106+
children
107+
);
108+
}
109+
110+
return h(paragraphTag ?? 'p', children);
111+
})
112+
.map(_resolveHeadingLinks);
113+
114+
const documentRoot = h('div', content);
115+
const tableOfContents = h('div', [resolveTableOfContents(documentRoot) || '']);
116+
117+
return {
118+
documentRoot,
119+
tableOfContents,
120+
title: customTitle ?? title,
121+
documentId,
122+
revisionId
123+
};
124+
}

0 commit comments

Comments
 (0)