Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ package-lock.json
*.tgz
.vscode
*.parquet
*.csv
*.jsonl
/coverage/

/lib/
Expand Down
12 changes: 6 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,25 @@
"squirreling": "0.7.9"
},
"devDependencies": {
"@storybook/react-vite": "10.2.2",
"@storybook/react-vite": "10.2.3",
"@testing-library/react": "16.3.2",
"@types/node": "25.1.0",
"@types/node": "25.2.0",
"@types/react": "19.2.10",
"@types/react-dom": "19.2.3",
"@vitejs/plugin-react": "5.1.2",
"@vitest/coverage-v8": "4.0.18",
"eslint": "9.39.2",
"eslint-plugin-react": "7.37.5",
"eslint-plugin-react-hooks": "7.0.1",
"eslint-plugin-react-refresh": "0.4.26",
"eslint-plugin-storybook": "10.2.2",
"globals": "17.2.0",
"eslint-plugin-react-refresh": "0.5.0",
"eslint-plugin-storybook": "10.2.3",
"globals": "17.3.0",
"jsdom": "27.4.0",
"nodemon": "3.1.11",
"npm-run-all": "4.1.5",
"react": "19.2.4",
"react-dom": "19.2.4",
"storybook": "10.2.2",
"storybook": "10.2.3",
"typescript": "5.9.3",
"typescript-eslint": "8.54.0",
"vite": "7.3.1",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import HighTable, { DataFrame, sortableDataFrame } from 'hightable'
import HighTable, { DataFrame } from 'hightable'
import 'hightable/src/HighTable.css'
import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
import React, { useCallback, useEffect, useState } from 'react'
import { useConfig } from '../../hooks/useConfig.js'
import { appendSearchParams } from '../../lib/routes.js'
import { FileSource } from '../../lib/sources/types.js'
import { parquetDataFrame } from '../../lib/tableProvider.js'
import { tableProvider } from '../../lib/tableProvider.js'
import { cn } from '../../lib/utils.js'
import CellPanel from '../CellPanel/CellPanel.js'
import ContentWrapper, { ContentSize } from '../ContentWrapper/ContentWrapper.js'
import SlidePanel from '../SlidePanel/SlidePanel.js'
import styles from './ParquetView.module.css'
import styles from './TableView.module.css'

interface ViewerProps {
source: FileSource
Expand All @@ -23,35 +22,30 @@ interface Content extends ContentSize {
}

/**
* Parquet file viewer
* Table file viewer for parquet, CSV, and JSONL files
*/
export default function ParquetView({ source, setProgress, setError }: ViewerProps) {
export default function TableView({ source, setProgress, setError }: ViewerProps) {
const [isLoading, setIsLoading] = useState<boolean>(true)
const [content, setContent] = useState<Content>()
const [cell, setCell] = useState<{ row: number, col: number } | undefined>()
const { customClass, routes } = useConfig()

useEffect(() => {
async function loadParquetDataFrame() {
async function loadDataFrame() {
try {
setIsLoading(true)
setProgress(0.33)
const { resolveUrl, requestInit } = source
const asyncBuffer = await asyncBufferFromUrl({ url: resolveUrl, requestInit })
const from = { url: resolveUrl, byteLength: asyncBuffer.byteLength, requestInit }
setProgress(0.66)
const metadata = await parquetMetadataAsync(asyncBuffer)
const dataframe = sortableDataFrame(parquetDataFrame(from, metadata))
const fileSize = asyncBuffer.byteLength
setContent({ dataframe, fileSize })
setProgress(0.5)
const { resolveUrl, fileName, requestInit } = source
const dataframe = await tableProvider({ url: resolveUrl, fileName, requestInit })
setContent({ dataframe })
} catch (error) {
setError(error)
} finally {
setIsLoading(false)
setProgress(1)
}
}
void loadParquetDataFrame()
void loadDataFrame()
}, [setError, setProgress, source])

// Close cell view on escape key
Expand Down
4 changes: 2 additions & 2 deletions src/components/Viewer/Viewer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import AvroView from '../AvroView/AvroView.js'
import ImageView from '../ImageView/ImageView.js'
import JsonView from '../JsonView/JsonView.js'
import MarkdownView from '../MarkdownView/MarkdownView.js'
import TableView from '../ParquetView/ParquetView.js'
import TableView from '../TableView/TableView.js'
import TextView from '../TextView/TextView.js'

interface ViewerProps {
Expand All @@ -21,7 +21,7 @@ export default function Viewer({ source, setError, setProgress }: ViewerProps) {
const { fileName } = source
if (fileName.endsWith('.md')) {
return <MarkdownView source={source} setError={setError} />
} else if (fileName.endsWith('.parquet')) {
} else if (fileName.endsWith('.parquet') || fileName.endsWith('.csv') || fileName.endsWith('.jsonl')) {
return <TableView source={source} setError={setError} setProgress={setProgress} />
} else if (fileName.endsWith('.json')) {
return <JsonView source={source} setError={setError} />
Expand Down
4 changes: 2 additions & 2 deletions src/components/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import Layout from './Layout/Layout.js'
import Markdown from './Markdown/Markdown.js'
import MarkdownView from './MarkdownView/MarkdownView.js'
import Page from './Page/Page.js'
import ParquetView from './ParquetView/ParquetView.js'
import TableView from './TableView/TableView.js'
import ProgressBar from './ProgressBar/ProgressBar.js'
import SlidePanel from './SlidePanel/SlidePanel.js'
import Spinner from './Spinner/Spinner.js'
Expand All @@ -40,7 +40,7 @@ export {
Markdown,
MarkdownView,
Page,
ParquetView,
TableView,
ProgressBar,
SlidePanel,
Spinner,
Expand Down
60 changes: 60 additions & 0 deletions src/lib/csv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm all for it :-) Right now I just wanted a quick fix for csv viewing, but I've hit issues recently with csvs that might benefit from cosovo. The streaming is cool!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice :) Feel free to first try if cosovo can parse them correctly

* Parse CSV text into nested array of rows and columns.
*/
export function parseCsv(text: string): string[][] {
const rows = []
let row = []
let field = ''
let inQuotes = false
let previousCharWasQuote = false

for (const char of text) {

if (inQuotes && char === '"' && !previousCharWasQuote) {
// first quote, wait to see if it's escaped or end of field
previousCharWasQuote = true
} else if (inQuotes && char === '"' && previousCharWasQuote) {
// csv escaped quote ##
field += char
previousCharWasQuote = false
} else if (inQuotes && !previousCharWasQuote) {
// append quoted character to field
field += char
} else {
// not in quotes
inQuotes = false
previousCharWasQuote = false
switch (char) {
case ',':
// emit column
row.push(field)
field = ''
break
case '\n':
// emit row
row.push(field)
rows.push(row)
row = []
field = ''
break
case '"':
inQuotes = true
break
default:
field += char
}
}
}

if (inQuotes && !previousCharWasQuote) {
console.error('csv unterminated quote')
}

// handle last field and row, but skip empty last line
if (field || row.length) {
row.push(field)
rows.push(row)
}

return rows
}
3 changes: 2 additions & 1 deletion src/lib/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export { appendSearchParams, replaceSearchParams } from './routes.js'
export * from './sources/index.js'
export { parquetDataFrame } from './tableProvider.js'
export { parseCsv } from './csv.js'
export { csvDataFrame, jsonLinesDataFrame, parquetDataFrame, tableProvider } from './tableProvider.js'
export { asyncBufferFrom, cn, contentTypes, formatFileSize, getFileDate, getFileDateShort, imageTypes, parseFileSize } from './utils.js'
export { parquetQueryWorker, parquetReadObjectsWorker, parquetReadWorker } from './workers/parquetWorkerClient.js'
export type { AsyncBufferFrom } from './workers/types.js'
79 changes: 77 additions & 2 deletions src/lib/tableProvider.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
import { DataFrame, DataFrameEvents, ResolvedValue, checkSignal, createEventTarget, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from 'hightable'
import { DataFrame, DataFrameEvents, ResolvedValue, arrayDataFrame, checkSignal, createEventTarget, sortableDataFrame, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from 'hightable'
import type { ColumnData } from 'hyparquet'
import { FileMetaData, ParquetReadOptions, parquetSchema } from 'hyparquet'
import { FileMetaData, ParquetReadOptions, asyncBufferFromUrl, parquetMetadataAsync, parquetSchema } from 'hyparquet'
import { parseCsv } from './csv.js'
import { parquetReadWorker } from './workers/parquetWorkerClient.js'
import type { AsyncBufferFrom } from './workers/types.d.ts'

interface TableProviderOptions {
url: string
fileName: string
requestInit?: RequestInit
}

/**
* Create a dataframe from a file URL, automatically detecting the file type.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe

Suggested change
* Create a dataframe from a file URL, automatically detecting the file type.
* Create a sortable dataframe from a file URL, automatically detecting the file type.

Also (nit), should we factor the call to sortableDataFrame to make it clear that all of them are sortable?

* Supports parquet, CSV, and JSONL files.
*/
export async function tableProvider({ url, fileName, requestInit }: TableProviderOptions): Promise<DataFrame> {
const asyncBuffer = await asyncBufferFromUrl({ url, requestInit })
const from = { url, byteLength: asyncBuffer.byteLength, requestInit }

const baseName = fileName.toLowerCase()
if (baseName.endsWith('.csv')) {
return csvDataFrame(from)
}

if (baseName.endsWith('.jsonl')) {
return jsonLinesDataFrame(from)
}

// Default to parquet
const metadata = await parquetMetadataAsync(asyncBuffer)
return sortableDataFrame(parquetDataFrame(from, metadata))
}

type GroupStatus = {
kind: 'unfetched'
} | {
Expand Down Expand Up @@ -130,3 +159,49 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData,

return unsortableDataFrame
}

/**
* Convert a CSV file into a sortable dataframe.
*
* Parses the entire file and creates a sortable dataframe.
* The first row is treated as the header.
*/
export async function csvDataFrame(from: AsyncBufferFrom): Promise<DataFrame> {
let buffer: ArrayBuffer
if ('file' in from) {
buffer = await from.file.arrayBuffer()
} else {
const response = await fetch(from.url, from.requestInit)
buffer = await response.arrayBuffer()
}

const text = new TextDecoder().decode(buffer)
const lines = parseCsv(text)
const header = lines[0] ?? []
const rows = lines.slice(1).map(row => {
return Object.fromEntries(header.map((key, i) => [key, row[i]]))
})
return sortableDataFrame(arrayDataFrame(rows))
}

/**
* Convert a JSONL file into a sortable dataframe.
*
* Parses each line as a JSON object and creates a sortable dataframe.
*/
export async function jsonLinesDataFrame(from: AsyncBufferFrom): Promise<DataFrame> {
let buffer: ArrayBuffer
if ('file' in from) {
buffer = await from.file.arrayBuffer()
} else {
const response = await fetch(from.url, from.requestInit)
buffer = await response.arrayBuffer()
}

const text = new TextDecoder().decode(buffer).trimEnd()
const lines = text.split('\n').filter(line => line.trim())
const rows: Record<string, unknown>[] = lines.map(line => {
return line ? JSON.parse(line) as Record<string, unknown> : {}
})
return sortableDataFrame(arrayDataFrame(rows))
}
63 changes: 63 additions & 0 deletions test/lib/csv.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { describe, expect, it, vi } from 'vitest'
import { parseCsv } from '../../src/index.js'

describe('parseCsv', () => {
it('parses simple CSV', () => {
const csv = 'Name,Age,Occupation\nAlice,30,Engineer\nBob,25,Designer'
const expected = [
['Name', 'Age', 'Occupation'],
['Alice', '30', 'Engineer'],
['Bob', '25', 'Designer'],
]
expect(parseCsv(csv)).toEqual(expected)
})

it('ignores empty last line', () => {
const csv = 'Name,Age,Occupation\nAlice,30,Engineer\n'
const expected = [
['Name', 'Age', 'Occupation'],
['Alice', '30', 'Engineer'],
]
expect(parseCsv(csv)).toEqual(expected)
})

it('handles quoted fields', () => {
const csv = 'Name,Age,Occupation\n"Alice, PhD",30,Engineer\nBob,25,"Designer, Senior"'
const expected = [
['Name', 'Age', 'Occupation'],
['Alice, PhD', '30', 'Engineer'],
['Bob', '25', 'Designer, Senior'],
]
expect(parseCsv(csv)).toEqual(expected)
})

it('handles escaped quotes', () => {
const csv = 'Name,Quote\nAlice,"She said, ""Hello world"""\nBob,"This is ""an example"" of quotes"'
const expected = [
['Name', 'Quote'],
['Alice', 'She said, "Hello world"'],
['Bob', 'This is "an example" of quotes'],
]
expect(parseCsv(csv)).toEqual(expected)
})

it('handles newlines within quoted fields', () => {
const csv = 'Name,Address\nAlice,"123 Main St.\nAnytown, USA"'
const expected = [
['Name', 'Address'],
['Alice', '123 Main St.\nAnytown, USA'],
]
expect(parseCsv(csv)).toEqual(expected)
})

it('handles unterminated quotes', () => {
const csv = 'Name,Quote\nAlice,"This is an unterminated quote\n'
const expected = [
['Name', 'Quote'],
['Alice', 'This is an unterminated quote\n'],
]
vi.spyOn(console, 'error')
expect(parseCsv(csv)).toEqual(expected)
expect(console.error).toHaveBeenCalledWith('csv unterminated quote')
})
})