Skip to content

Commit

Permalink
fixup! discojs-core/models: add gpt
Browse files Browse the repository at this point in the history
  • Loading branch information
tharvik committed Mar 1, 2024
1 parent f1d0036 commit 8d43eb9
Show file tree
Hide file tree
Showing 5 changed files with 846 additions and 840 deletions.
118 changes: 59 additions & 59 deletions discojs/discojs-core/src/models/gpt/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,71 +7,71 @@ type ModelType =
| 'gpt-micro'
| 'gpt-nano'

type ModelSize = {
nLayer?: number
nHead?: number
nEmbd?: number
interface ModelSize {
nLayer?: number
nHead?: number
nEmbd?: number
}

export type GPTConfig = {
lr: number
batchSize: number
blockSize: number
vocabSize: number
evaluate?: boolean
maxEvalBatches?: number
evaluateEvery?: number
epochs?: number
maxIter?: number
weightDecay?: number
verbose?: boolean
bias?: boolean
debug?: boolean
dropout?: number
residDrop?: number
embdDrop?: number
tokEmb?: boolean
lmHead?: boolean
modelType: ModelType
export interface GPTConfig {
lr: number
batchSize: number
blockSize: number
vocabSize: number
evaluate?: boolean
maxEvalBatches?: number
evaluateEvery?: number
epochs?: number
maxIter?: number
weightDecay?: number
verbose?: boolean
bias?: boolean
debug?: boolean
dropout?: number
residDrop?: number
embdDrop?: number
tokEmb?: boolean
lmHead?: boolean
modelType: ModelType
}

export const DEFAULT_CONFIG: Required<GPTConfig> = {
lr: 0.001,
weightDecay: 0,
batchSize: 2,
epochs: 9999,
maxIter: 10_000,
verbose: false,
modelType: 'gpt-nano',
evaluate: true,
maxEvalBatches: 12,
evaluateEvery: 100,
blockSize: 128,
vocabSize: 50258,
bias: true,
debug: false,
dropout: 0.2,
residDrop: 0.2,
embdDrop: 0.2,
tokEmb: true,
lmHead: true,
lr: 0.001,
weightDecay: 0,
batchSize: 2,
epochs: 9999,
maxIter: 10_000,
verbose: false,
modelType: 'gpt-nano',
evaluate: true,
maxEvalBatches: 12,
evaluateEvery: 100,
blockSize: 128,
vocabSize: 50258,
bias: true,
debug: false,
dropout: 0.2,
residDrop: 0.2,
embdDrop: 0.2,
tokEmb: true,
lmHead: true
}

export const getModelSizes = (modelType: ModelType): Required<ModelSize> => {
switch (modelType) {
case 'gpt2':
return { nLayer: 12, nHead: 12, nEmbd: 768 }
case 'gpt2-medium':
return { nLayer: 24, nHead: 16, nEmbd: 1024 }
case 'gpt2-large':
return { nLayer: 36, nHead: 20, nEmbd: 1280 }
case 'gpt2-xl':
return { nLayer: 48, nHead: 25, nEmbd: 1600 }
case 'gpt-mini':
return { nLayer: 6, nHead: 6, nEmbd: 192 }
case 'gpt-micro':
return { nLayer: 4, nHead: 4, nEmbd: 128 }
case 'gpt-nano':
return { nLayer: 3, nHead: 3, nEmbd: 48 }
}
switch (modelType) {
case 'gpt2':
return { nLayer: 12, nHead: 12, nEmbd: 768 }
case 'gpt2-medium':
return { nLayer: 24, nHead: 16, nEmbd: 1024 }
case 'gpt2-large':
return { nLayer: 36, nHead: 20, nEmbd: 1280 }
case 'gpt2-xl':
return { nLayer: 48, nHead: 25, nEmbd: 1600 }
case 'gpt-mini':
return { nLayer: 6, nHead: 6, nEmbd: 192 }
case 'gpt-micro':
return { nLayer: 4, nHead: 4, nEmbd: 128 }
case 'gpt-nano':
return { nLayer: 3, nHead: 3, nEmbd: 48 }
}
}
66 changes: 33 additions & 33 deletions discojs/discojs-core/src/models/gpt/evaluate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,47 @@ import type { data } from '../..'

import type { GPTConfig } from './config'

export default async function evaluate(
model: any,
dataset: data.Dataset,
config: Required<GPTConfig>
export default async function evaluate (

Check failure on line 7 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Missing return type on function
model: any,
dataset: data.Dataset,
config: Required<GPTConfig>
) {
console.log('Evaluating..')
console.log('Evaluating..')

const iter = await dataset.iterator()
const iter = await dataset.iterator()

let total_loss = 0
const acc: [number, number] = [0, 0]
let total_loss = 0

Check failure on line 16 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Variable name `total_loss` must match one of the following formats: camelCase, PascalCase, UPPER_CASE
const acc: [number, number] = [0, 0]

let iteration = 0
while (iteration < config.maxEvalBatches) {
const next = await iter.next()
if (!next) break
const { xs, ys } = next.value
const logits = model.apply(xs)
let iteration = 0
while (iteration < config.maxEvalBatches) {
const next = await iter.next()
if (!next) break

Check failure on line 22 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Unexpected object value in conditional. The condition is always true
const { xs, ys } = next.value
const logits = model.apply(xs)

// Loss
const loss = tf.losses.softmaxCrossEntropy(ys, logits)
const lossVal = await loss.array()
total_loss += lossVal as number
// Loss
const loss = tf.losses.softmaxCrossEntropy(ys, logits)
const lossVal = await loss.array()
total_loss += lossVal as number

// Accuracy
const acc_tensor = tf.metrics.categoricalAccuracy(ys, logits)
const acc_sum = acc_tensor.sum()
acc[0] += (await acc_sum.array()) as number
acc[1] += acc_tensor.shape[0] * (acc_tensor.shape[1] as number)
// Accuracy
const acc_tensor = tf.metrics.categoricalAccuracy(ys, logits)

Check failure on line 32 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Variable name `acc_tensor` must match one of the following formats: camelCase, PascalCase, UPPER_CASE

Check failure on line 32 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Unsafe argument of type `any` assigned to a parameter of type `Tensor<Rank>`

Check failure on line 32 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Unsafe argument of type `any` assigned to a parameter of type `Tensor<Rank>`
const acc_sum = acc_tensor.sum()

Check failure on line 33 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Variable name `acc_sum` must match one of the following formats: camelCase, PascalCase, UPPER_CASE
acc[0] += (await acc_sum.array()) as number
acc[1] += acc_tensor.shape[0] * (acc_tensor.shape[1]!)

Check failure on line 35 in discojs/discojs-core/src/models/gpt/evaluate.ts

View workflow job for this annotation

GitHub Actions / lint-lib-core

Forbidden non-null assertion

tf.dispose([acc_tensor, acc_sum, loss, logits, xs, ys])
tf.dispose([acc_tensor, acc_sum, loss, logits, xs, ys])

iteration++
}
iteration++
}

const loss = total_loss / iteration
const pp = 2.71828 ** loss
const loss = total_loss / iteration
const pp = 2.71828 ** loss

return {
'val/loss': loss,
'val/perplexity': pp,
'val/acc': acc[0] / acc[1],
}
return {
'val/loss': loss,
'val/perplexity': pp,
'val/acc': acc[0] / acc[1]
}
}
Loading

0 comments on commit 8d43eb9

Please sign in to comment.