diff --git a/packages/xgplayer-mp4-loader/src/config.js b/packages/xgplayer-mp4-loader/src/config.js index 3dbf36a27..5ba2321d0 100644 --- a/packages/xgplayer-mp4-loader/src/config.js +++ b/packages/xgplayer-mp4-loader/src/config.js @@ -1,10 +1,14 @@ export function getConfig (cfg) { + if (typeof cfg.fixEditListOffset !== 'boolean') { + delete cfg.fixEditListOffset + } return { vid: '', moovEnd: 80000, segmentDuration: 2, maxDownloadInfoSize: 30, responseType: 'arraybuffer', + fixEditListOffset: true, cache: null, // ...xgplayer-streaming-shared/src/net/config ...cfg diff --git a/packages/xgplayer-mp4-loader/src/loader.js b/packages/xgplayer-mp4-loader/src/loader.js index 14358b217..139cf42c6 100644 --- a/packages/xgplayer-mp4-loader/src/loader.js +++ b/packages/xgplayer-mp4-loader/src/loader.js @@ -3,7 +3,7 @@ import { MP4Parser } from 'xgplayer-transmuxer' import { getConfig } from './config' import { MediaError } from './error' import { Cache } from './cache' -import { isNumber, moovToMeta, moovToSegments } from './utils' +import { isNumber, moovToMeta, moovToSegments, isSegmentsOk } from './utils' import EventEmitter from 'eventemitter3' export class MP4Loader extends EventEmitter { @@ -76,10 +76,10 @@ export class MP4Loader extends EventEmitter { return this.meta } - async loadMetaProcess (cache, [moovStart, moovEnd], onProgress, config) { + async loadMetaProcess (cache, [moovStart, moovEnd], onProgress, config = {}) { this._error = false this.logger.debug('[loadMetaProcess start], range,', [moovStart, moovEnd]) - const OnProgressHandle = async (data, state, options) => { + const OnProgressHandle = async (data, state, options, response) => { if (this.meta && options?.range && options.range.length > 0 && options.range[1] >= moovEnd) { state = true this.logger.debug('[loadMetaProcess],data done,setstate true,[', moovStart, moovEnd, ']') @@ -89,7 +89,7 @@ export class MP4Loader extends EventEmitter { this.logger.debug('[loadMetaProcess],data not done,setstate false,[', moovStart, moovEnd, ']') } this.logger.debug('[loadMetaProcess],task,[', moovStart, moovEnd, '], range,', options.range, ',dataLen,', (data ? data.byteLength : undefined), ', state,', state, ',err,',this._error) - !this._error && data && data.byteLength > 0 && onProgress(data, state, options) + !this._error && data && data.byteLength > 0 && onProgress(data, state, options, null, response) if (this.meta.moov || this._error) return if (data && data.byteLength > 0) { this.buffer = concatUint8Array(this.buffer, data) @@ -99,7 +99,7 @@ export class MP4Loader extends EventEmitter { if (state) { if (!mdat) { this._error = true - onProgress(null, state, options, {err:'cannot find moov or mdat box'}) + onProgress(null, state, options, new MediaError('cannot find moov or mdat box'), response) return // throw new MediaError('cannot find moov or mdat box') } else { @@ -119,15 +119,15 @@ export class MP4Loader extends EventEmitter { const parsedMoov = MP4Parser.moov(moov) if (!parsedMoov) { this._error = true - onProgress(null, state, options, {err:'cannot parse moov box'}) + onProgress(null, state, options, new MediaError('cannot parse moov box'), response) return // throw new MediaError('cannot parse moov box', moov.data) } - const segments = moovToSegments(parsedMoov, this._config.segmentDuration) - if (!segments) { + const segments = moovToSegments(parsedMoov, this._config) + if (!isSegmentsOk(segments)) { this._error = true - onProgress(null, state, options, {err:'cannot parse segments'}) + onProgress(null, state, options, new MediaError('cannot parse segments'), response) return // throw new MediaError('cannot parse segments', moov.data) } @@ -143,14 +143,14 @@ export class MP4Loader extends EventEmitter { videoSegments, audioSegments } - }) + }, null, response) } } } await this.loadData([moovStart, moovEnd || this._config.moovEnd], cache, { onProgress: OnProgressHandle, ...config}) } - async loadMeta (cache, moovEnd, config) { + async loadMeta (cache, moovEnd, config = {}) { const responses = [] this.logger.debug('[loadMeta start]') let res = await this.loadData([0, moovEnd || this._config.moovEnd], cache, config) @@ -181,9 +181,8 @@ export class MP4Loader extends EventEmitter { if (!parsedMoov) { throw new MediaError('cannot parse moov box', moov.data) } - - const segments = moovToSegments(parsedMoov, this._config.segmentDuration) - if (!segments) { + const segments = moovToSegments(parsedMoov, this._config) + if (!isSegmentsOk(segments)) { throw new MediaError('cannot parse segments', moov.data) } @@ -202,7 +201,7 @@ export class MP4Loader extends EventEmitter { loadCacheMeta (meta, segmentIndex){ const { moov } = meta - const segments = moovToSegments(moov, this._config.segmentDuration) + const segments = moovToSegments(moov, this._config) const { videoSegments, audioSegments } = segments this.videoSegments = videoSegments this.audioSegments = audioSegments @@ -315,12 +314,12 @@ export class MP4Loader extends EventEmitter { return res } - async loadData (range, cache, config) { + async loadData (range, cache, config = {}) { const cacheKey = this._getCacheKey(range) const data = await this.cache.get(cacheKey) let res if (!data) { - const url = config && config.url ? config.url : this.url + const url = config?.url ? config.url : this.url res = await this._loader.load(url, { range, vid: this.vid, ...config }) } else { res = { data, state: true, options: { fromCache: true, range, vid: this.vid } } diff --git a/packages/xgplayer-mp4-loader/src/utils.js b/packages/xgplayer-mp4-loader/src/utils.js index 3fe988a60..02a0bf572 100644 --- a/packages/xgplayer-mp4-loader/src/utils.js +++ b/packages/xgplayer-mp4-loader/src/utils.js @@ -1,5 +1,20 @@ +function isEdtsApplicable () { + let flag = true + const userAgent = navigator.userAgent || '' + const isChrome = /Chrome/gi.test(userAgent) && !/Edge\//gi.test(userAgent) -export function moovToSegments (moov, duration) { + // M75+ 开始支持负的 dts + // https://bugs.chromium.org/p/chromium/issues/detail?id=398141 + if (isChrome) { + const result = userAgent.match(/Chrome\/(\d+)/i) + const chromeVersion = result ? parseInt(result[1], 10) : 0 + flag = !!chromeVersion && chromeVersion >= 75 + } + return flag +} + +export function moovToSegments (moov, config) { + const { segmentDuration } = config const tracks = moov.trak if (!tracks || !tracks.length) return const videoTrack = tracks.find(t => t.mdia?.hdlr?.handlerType === 'vide') @@ -11,33 +26,70 @@ export function moovToSegments (moov, duration) { let segmentDurations if (videoTrack) { - const videoStbl = videoTrack.mdia?.minf?.stbl - if (!videoStbl) return - const timescale = videoTrack.mdia.mdhd?.timescale - const { stts, stsc, stsz, stco, stss, ctts } = videoStbl - if (!timescale || !stts || !stsc || !stsz || !stco || !stss) return - videoSegments = getSegments(duration, timescale, stts, stsc, stsz, stco, stss, ctts) + videoSegments = getSegments('video', videoTrack, segmentDuration, config) segmentDurations = videoSegments.map(x => x.duration) } if (audioTrack) { - const audioStbl = audioTrack.mdia?.minf?.stbl - if (!audioStbl) return - const timescale = audioTrack.mdia.mdhd?.timescale - const { stts, stsc, stsz, stco } = audioStbl - if (!timescale || !stts || !stsc || !stsz || !stco) return - audioSegments = getSegments(duration, timescale, stts, stsc, stsz, stco, null, null, segmentDurations) + audioSegments = getSegments( + 'audio', + audioTrack, + segmentDuration, + config, + segmentDurations, + videoSegments + ) } - return { videoSegments, audioSegments } } -function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts, segmentDurations) { +function getSegments ( + type, + track, + segDuration, + config, + segmentDurations = [], + videoSegments +) { + const { fixEditListOffset, fixEditListOffsetThreshold, audioGroupingStrategy } = config + const stbl = track.mdia?.minf?.stbl + if (!stbl) { + return [] + } + + const timescale = track.mdia.mdhd?.timescale + const { stts, stsc, stsz, stco, stss, ctts } = stbl + if (!timescale || !stts || !stsc || !stsz || !stco || (type === 'video' && !stss)) { + return [] + } + + // chrome等浏览器内核为了修复B帧引入的CTS偏移时间,对于edts->elst box中的media_time进行了参考 + // 目前chrome仅读取media_time,不支持编辑列表的其他用途,因为它们不常见并且由更高级的协议提供更好的服务。 + // 如果不参考editList信息,一些视频会有音画不同步问题 + let editListOffset = 0 + const editList = track.edts?.elst?.entries + if ( + fixEditListOffset && + isEdtsApplicable() && + Array.isArray(editList) && + editList.length > 0 + ) { + const media_time = editList[0].media_time + const maxAllowedTime = fixEditListOffsetThreshold + ? fixEditListOffsetThreshold * timescale + : 5 * timescale + if (media_time > 0 && media_time < maxAllowedTime) { + editListOffset = media_time + } + } + const frames = [] const gop = [] const gopDuration = [] + let gopMinPtsArr = [] // 记录每个gop中最小的pts,用于计算每个gop的startTime + let gopMaxPtsFrameIdxArr = [] // 记录每个gop中最大的pts,用于计算每个gop的endTime const stscEntries = stsc.entries const stcoEntries = stco.entries const stszEntrySizes = stsz.entrySizes @@ -55,26 +107,40 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts let keyframeMap if (stssEntries) { keyframeMap = {} - stssEntries.forEach(x => { keyframeMap[x - 1] = true }) + stssEntries.forEach(x => { + keyframeMap[x - 1] = true + }) } let frame let duration - let startTime = 0 + // let startTime = 0 let pos = 0 let chunkIndex = 0 let chunkRunIndex = 0 let offsetInChunk = 0 - let lastSampleInChunk = stscEntries[0].samplesPerChunk - let lastChunkInRun = stscEntries[1] ? stscEntries[1].firstChunk - 1 : Infinity + let lastSampleInChunk = stscEntries.length > 0 ? stscEntries[0].samplesPerChunk : 0 + let lastChunkInRun = stscEntries.length > 1 && stscEntries[1] ? stscEntries[1].firstChunk - 1 : Infinity let dts = 0 let gopId = -1 + let editListApplied = false + + if (cttsArr?.length > 0 && editListOffset > 0) { + // 参考chromium原生播放时,ffmpeg_demuxer处理edts后的逻辑: + // FFmpeg将所有AVPacket dts值根据editListOffset进行偏移,以确保解码器有足够的解码时间(即保持cts不变,dts从负值开始) + // FFmpeg对于音频的AVPacket dts/pts虽然也进行了偏移,但在chromium中最后给到decoder时又将其偏移修正回0 + // 因此,这里的逻辑是为了触发baseMediaDecodeTime变化,并且只修正视频,不处理音频 + dts -= editListOffset + editListApplied = true + } + + track.editListApplied = editListApplied + stts.entries.forEach(({ count, delta }) => { - duration = delta // / timescale + duration = delta // in timescale for (let i = 0; i < count; i++) { frame = { dts, - startTime, duration, size: stszEntrySizes[pos] || stsz.sampleSize, offset: stcoEntries[chunkIndex] + offsetInChunk, @@ -95,11 +161,32 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts if (cttsArr && pos < cttsArr.length) { frame.pts = dts + cttsArr[pos] } - if (pos === 0) { + if (editListOffset === 0 && pos === 0) { frame.pts = 0 } + // 补足音频的pts + if (frame.pts === undefined) { + frame.pts = frame.dts + } + // 更新当前gop中最小的pts + if (frame.keyframe) { + gopMinPtsArr.push(frame.pts) + } else { + if (frame.pts < gopMinPtsArr[gop.length - 1]) { + gopMinPtsArr[gop.length - 1] = frame.pts + } + } + // 更新当前gop中最大的pts + if (frame.keyframe) { + gopMaxPtsFrameIdxArr.push(frame.index) + } else if (gop.length > 0 && gopMaxPtsFrameIdxArr[gop.length - 1] !== undefined) { + const curMaxPts = frames[gopMaxPtsFrameIdxArr[gop.length - 1]]?.pts + if (curMaxPts !== undefined && frame.pts > curMaxPts) { + gopMaxPtsFrameIdxArr[gop.length - 1] = frame.index + } + } frames.push(frame) - startTime += duration + dts += delta pos++ @@ -110,7 +197,9 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts offsetInChunk = 0 if (chunkIndex >= lastChunkInRun) { chunkRunIndex++ - lastChunkInRun = stscEntries[chunkRunIndex + 1] ? stscEntries[chunkRunIndex + 1].firstChunk - 1 : Infinity + lastChunkInRun = stscEntries[chunkRunIndex + 1] + ? stscEntries[chunkRunIndex + 1].firstChunk - 1 + : Infinity } lastSampleInChunk += stscEntries[chunkRunIndex].samplesPerChunk } @@ -118,27 +207,44 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts }) const l = frames.length - if (!l || (stss && !frames[0].keyframe)) return + if (!l || (stss && !frames[0].keyframe)) { + return [] + } const segments = [] let segFrames = [] let time = 0 let lastFrame let adjust = 0 - const pushSegment = (duration) => { + let segMinPts = 0 + let segMaxPtsFrame = 0 + const pushSegment = (duration, startGopIdx, endGopIdx) => { lastFrame = segFrames[segFrames.length - 1] + segMinPts = gopMinPtsArr[startGopIdx] + segMaxPtsFrame = frames[gopMaxPtsFrameIdxArr[endGopIdx]] + // 因为强制把视频第一帧的pts改为0 ,所以第一个gop的时长可能和endTime - startTime对应不上 + // 需要修正下,不然音频根据视频gop时长截取的第一个关键帧起始的误差较大 + if (segments.length === 0) { + const diff = segMaxPtsFrame.pts + segMaxPtsFrame.duration - segMinPts + duration = diff / timescale + } segments.push({ index: segments.length, - startTime: (segments[segments.length - 1]?.endTime || segFrames[0].startTime / timescale), - endTime: (lastFrame.startTime + lastFrame.duration) / timescale, + startTime: segMinPts / timescale, // (segments[segments.length - 1]?.endTime || segFrames[0].startTime / timescale), + endTime: (segMaxPtsFrame.pts + segMaxPtsFrame.duration) / timescale, duration: duration, - range: [segFrames[0].offset, lastFrame.offset + lastFrame.size], + range: [segFrames[0].offset, lastFrame.offset + lastFrame.size - 1], frames: segFrames }) - time = 0 + + if (audioGroupingStrategy !== 1) { + time = 0 + } + segFrames = [] } + let segGopStartIdx = 0 if (stss) { const duration = segDuration * timescale for (let i = 0, l = gop.length; i < l; i++) { @@ -146,23 +252,71 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts segFrames.push(...gop[i]) if (i + 1 < l) { if (i === 0 || time > duration) { - pushSegment(time / timescale) + pushSegment(time / timescale, segGopStartIdx, i) + segGopStartIdx = i + 1 } } else { - pushSegment(time / timescale) + pushSegment(time / timescale, segGopStartIdx, i) + segGopStartIdx = i + 1 } } } else { - segmentDurations = segmentDurations || [] + gopMinPtsArr = [] + gopMaxPtsFrameIdxArr = [] let duration = segmentDurations[0] || segDuration - for (let i = 0; i < l; i++) { - segFrames.push(frames[i]) - time += frames[i].duration - const curTime = time / timescale - if (i + 1 >= l || curTime + adjust >= duration) { - adjust += curTime - duration - pushSegment(curTime) - duration = segmentDurations[segments.length] || segDuration + + if (audioGroupingStrategy === 1) { + for (let i = 0, nextEndTime; i < l; i++) { + const curFrame = frames[i] + const nextFrame = frames[i + 1] + const isFinalFrame = i === l - 1 + segFrames.push(curFrame) + time += curFrame.duration + const curEndTime = nextEndTime || time / timescale + // 这里使用下一帧的目的是将每个分组的起始音频帧应该覆盖或包含GOP的开始时间, + // MSE在remove buffer时会将gop结束时间点的那个音频帧删掉,这个策略就是为了 + // 防止之后再添加新的Coded Frame Group时由于缺少了一帧音频容易产生Buffer gap + nextEndTime = (nextFrame ? time + nextFrame.duration : 0) / timescale + if ( + isFinalFrame || + ( + videoSegments[segments.length] + ? nextEndTime > videoSegments[segments.length].endTime /* 有视频帧,使用GOP时间戳进行分割 */ + : nextEndTime - segFrames[0].pts / timescale >= duration /* 无视频帧(包含音频帧大于视频时长的剩余音频帧分组的场景),使用配置的切片时间或最后一个GOP时长进行分割 */ + ) + ) { + gopMinPtsArr.push(segFrames[0].pts) + gopMaxPtsFrameIdxArr.push(segFrames[segFrames.length - 1].index) + pushSegment(curEndTime, segments.length, segments.length) + duration = segmentDurations[segments.length] || segDuration + } + } + } else { + for (let i = 0, nextEndTime; i < l; i++) { + const curFrame = frames[i] + const nextFrame = frames[i + 1] + const isFinalFrame = i === l - 1 + segFrames.push(curFrame) + time += curFrame.duration + const curEndTime = nextEndTime || time / timescale + nextEndTime = (nextFrame ? time + nextFrame.duration : 0) / timescale + if ( + isFinalFrame || + // 这里使用下一帧的目的是将每个分组的起始音频帧应该覆盖或包含GOP的开始时间, + // MSE在remove buffer时会将gop结束时间点的那个音频帧删掉,这个策略就是为了 + // 防止之后再添加新的Coded Frame Group时由于缺少了一帧音频容易产生Buffer gap + nextEndTime + adjust >= duration + ) { + if (audioGroupingStrategy === 2) { + adjust += time / timescale - duration + } else { + adjust += nextEndTime - duration + } + gopMinPtsArr.push(segFrames[0].pts) + gopMaxPtsFrameIdxArr.push(segFrames[segFrames.length - 1].index) + pushSegment(curEndTime, segments.length, segments.length) + duration = segmentDurations[segments.length] || segDuration + } } } } @@ -234,3 +388,17 @@ export function moovToMeta (moov) { export function isNumber (n) { return typeof n === 'number' && !Number.isNaN(n) } + + +export function isSegmentsOk (segments) { + if (!segments) { + return false + } + const {audioSegments , videoSegments} = segments + const v = !videoSegments || videoSegments.length === 0 + const a = !audioSegments || audioSegments.length === 0 + if (v && a) { + return false + } + return true +} \ No newline at end of file diff --git a/packages/xgplayer-transmuxer/src/codec/ExpGolomb.js b/packages/xgplayer-transmuxer/src/codec/ExpGolomb.js new file mode 100644 index 000000000..8660f0646 --- /dev/null +++ b/packages/xgplayer-transmuxer/src/codec/ExpGolomb.js @@ -0,0 +1,131 @@ +export default class ExpGolomb { + _bytesAvailable + + _bitsAvailable = 0 + + _word = 0 + + constructor (data) { + if (!data) throw new Error('ExpGolomb data params is required') + this._data = data + this._bytesAvailable = data.byteLength + if (this._bytesAvailable) this._loadWord() + } + + _loadWord () { + const position = this._data.byteLength - this._bytesAvailable + const availableBytes = Math.min(4, this._bytesAvailable) + if (availableBytes === 0) throw new Error('No bytes available') + + const workingBytes = new Uint8Array(4) + workingBytes.set(this._data.subarray(position, position + availableBytes)) + + this._word = new DataView(workingBytes.buffer).getUint32(0) + this._bitsAvailable = availableBytes * 8 + this._bytesAvailable -= availableBytes + } + + bitsPos () { + return this._bytesAvailable * 8 - this._bitsAvailable + } + + bitsLeft () { + return this._data.length * 8 - this.bitsPos() + } + + byteAligned () { + return this.bitsPos() === 0 || (this.bitsPos() % 8 === 0) + } + + skipBits (count) { + if (this._bitsAvailable > count) { + this._word <<= count + this._bitsAvailable -= count + } else { + count -= this._bitsAvailable + const skipBytes = Math.floor(count / 8) + count -= (skipBytes * 8) + this._bytesAvailable -= skipBytes + this._loadWord() + this._word <<= count + this._bitsAvailable -= count + } + } + + readBits (size) { + if (size > 32) { + throw new Error('Cannot read more than 32 bits') + } + + let bits = Math.min(this._bitsAvailable, size) + const val = this._word >>> (32 - bits) + + this._bitsAvailable -= bits + if (this._bitsAvailable > 0) { + this._word <<= bits + } else if (this._bytesAvailable > 0) { + this._loadWord() + } + + bits = size - bits + if (bits > 0 && this._bitsAvailable) { + return (val << bits) | this.readBits(bits) + } + return val + } + + skipLZ () { + let leadingZeroCount + for ( + leadingZeroCount = 0; + leadingZeroCount < this._bitsAvailable; + ++leadingZeroCount + ) { + if ((this._word & (0x80000000 >>> leadingZeroCount)) !== 0) { + this._word <<= leadingZeroCount + this._bitsAvailable -= leadingZeroCount + return leadingZeroCount + } + } + this._loadWord() + return leadingZeroCount + this.skipLZ() + } + + skipUEG () { + this.skipBits(1 + this.skipLZ()) + } + + readUEG () { + const clz = this.skipLZ() + return this.readBits(clz + 1) - 1 + } + + readEG () { + const val = this.readUEG() + if (1 & val) { + return (1 + val) >>> 1 + } + return -1 * (val >>> 1) + } + + readBool () { + return this.readBits(1) === 1 + } + + readUByte () { + return this.readBits(8) + } + + skipScalingList (count) { + let lastScale = 8 + let nextScale = 8 + let deltaScale + for (let j = 0; j < count; j++) { + if (nextScale !== 0) { + deltaScale = this.readEG() + nextScale = (lastScale + deltaScale + 256) % 256 + } + lastScale = nextScale === 0 ? lastScale : nextScale + } + } +} diff --git a/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js b/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js index 15921b45a..1193e8fdd 100644 --- a/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js +++ b/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js @@ -55,7 +55,7 @@ export class FMP4Demuxer { if (videoTrack.id == k) { tracks[k].map(x => { x.offset += baseOffset - const sample = new VideoSample((x.pts || x.dts) + videoBaseMediaDecodeTime, x.dts + videoBaseMediaDecodeTime) + const sample = new VideoSample((typeof x.pts === 'number' ? x.pts : x.dts) + videoBaseMediaDecodeTime, x.dts + videoBaseMediaDecodeTime) sample.duration = x.duration sample.gopId = x.gopId if (x.keyframe) sample.setToKeyframe() diff --git a/packages/xgplayer-transmuxer/src/mp4/mp4-demuxer.js b/packages/xgplayer-transmuxer/src/mp4/mp4-demuxer.js index 49d168610..d1ae58592 100644 --- a/packages/xgplayer-transmuxer/src/mp4/mp4-demuxer.js +++ b/packages/xgplayer-transmuxer/src/mp4/mp4-demuxer.js @@ -63,7 +63,7 @@ export class MP4Demuxer { } startByte = sample.offset - dataStart sampleData = data.subarray(startByte, startByte + sample.size) - frame = new VideoSample(sample.pts || sample.dts, sample.dts) + frame = new VideoSample(typeof sample.pts === 'number' ? sample.pts : sample.dts, sample.dts) frame.duration = sample.duration frame.gopId = sample.gopId if (sample.keyframe) frame.setToKeyframe() @@ -154,7 +154,7 @@ export class MP4Demuxer { startByte = sample.offset - dataStart videoEndByte = startByte + sample.size sampleData = data.subarray(startByte, videoEndByte) - frame = new VideoSample(sample.pts || sample.dts, sample.dts) + frame = new VideoSample(typeof sample.pts === 'number' ? sample.pts : sample.dts, sample.dts) frame.duration = sample.duration // this.log.debug('[video !!!!!!!!],frame,index,', sample.index, ',segmentIdx', segmentIdx, ', dataStart,', dataStart, ',dataEnd', end, ',Samplestart,', sample.offset, ', SampleEnd,', sample.offset + sample.size, ',size,', sample.size, 'dts,', sample.dts, ',pts,', sample.pts, ', keyframe', sample.keyframe) frame.gopId = sample.gopId diff --git a/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js b/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js index 4e84af5e9..fad4dd709 100644 --- a/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js +++ b/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js @@ -1,6 +1,7 @@ -import { AudioCodecType, VideoCodecType } from '../model' -import { getAvcCodec, readBig16, readBig24, readBig32, readBig64 } from '../utils' import { AAC } from '../codec' +import { AudioCodecType, VideoCodecType } from '../model' +import { getAvcCodec, readBig16, readBig24, readBig32, readBig64, readInt32, readInt64 } from '../utils' + export class MP4Parser { static findBox (data, names, start = 0) { const ret = [] @@ -142,6 +143,7 @@ export class MP4Parser { return parseBox(box, false, (ret, data, start) => { ret.tkhd = MP4Parser.tkhd(MP4Parser.findBox(data, ['tkhd'], start)[0]) ret.mdia = MP4Parser.mdia(MP4Parser.findBox(data, ['mdia'], start)[0]) + ret.edts = MP4Parser.edts(MP4Parser.findBox(data, ['edts'], start)[0]) }) } @@ -170,6 +172,41 @@ export class MP4Parser { }) } + static edts (box) { + return parseBox(box, false, (ret, data, start) => { + ret.elst = MP4Parser.elst(MP4Parser.findBox(data, ['elst'], start)[0]) + }) + } + + static elst (box) { + return parseBox(box, true, (ret, data, start) => { + ret.entries = [] + ret.entriesData = data + let offset = 0 + const entry_count = readBig32(data, offset) + offset += 4 + for (let i = 0; i < entry_count; i++) { + const entry = {} + ret.entries.push(entry) + if (ret.version === 1) { + entry.segment_duration = readBig64(data, offset) + offset += 8 + entry.media_time = readInt64(data, offset) + offset += 8 + } else { + entry.segment_duration = readBig32(data, offset) + offset += 4 + entry.media_time = readInt32(data, offset) + offset += 4 + } + entry.media_rate_integer = readBig16(data, offset) + offset += 2 + entry.media_rate_fraction = readBig16(data, start) + offset += 2 + } + }) + } + static mdhd (box) { return parseBox(box, true, (ret, data) => { let start = 0 @@ -767,6 +804,10 @@ export class MP4Parser { v.mvhdTimecale = moov.mvhd.timescale v.timescale = v.formatTimescale = vTrack.mdia.mdhd.timescale v.duration = vTrack.mdia.mdhd.duration || (v.mvhdDurtion / v.mvhdTimecale * v.timescale) + if (vTrack.edts?.elst) { + v.editList = vTrack.edts.elst + v.editListApplied = vTrack.editListApplied + } const e1 = vTrack.mdia.minf.stbl.stsd.entries[0] v.width = e1.width v.height = e1.height @@ -815,6 +856,10 @@ export class MP4Parser { a.mvhdTimecale = moov.mvhd.timescale a.timescale = a.formatTimescale = aTrack.mdia.mdhd.timescale a.duration = aTrack.mdia.mdhd.duration || (a.mvhdDurtion / a.mvhdTimecale * a.timescale) + if (aTrack.edts?.elst) { + a.editList = aTrack.edts.elst + a.editListApplied = aTrack.editListApplied + } const e1 = aTrack.mdia.minf.stbl.stsd.entries[0] a.sampleSize = e1.sampleSize a.sampleRate = e1.sampleRate diff --git a/packages/xgplayer-transmuxer/src/mp4/mp4.js b/packages/xgplayer-transmuxer/src/mp4/mp4.js index 89e6780ff..c41b92a35 100644 --- a/packages/xgplayer-transmuxer/src/mp4/mp4.js +++ b/packages/xgplayer-transmuxer/src/mp4/mp4.js @@ -12,6 +12,8 @@ export class MP4 { 'hvcC', 'dinf', 'dref', + 'edts', + 'elst', 'esds', 'ftyp', 'hdlr', @@ -165,6 +167,15 @@ export class MP4 { return ret } + static FullBox (type, version, flags, ...payload) { + return MP4.box(type, new Uint8Array([ + version, + (flags >> 16) & 0xff, + (flags >> 8) & 0xff, + flags & 0xff + ]), ...payload) + } + static ftyp (tracks) { const isHevc = tracks.find(t => t.type === TrackType.VIDEO && t.codecType === VideoCodecType.HEVC) return isHevc ? MP4.FTYPHEV1 : MP4.FTYPAVC1 @@ -259,6 +270,7 @@ export class MP4 { const trak = MP4.box( MP4.types.trak, MP4.tkhd(track.id, track.tkhdDuration || 0, track.width, track.height), + // track.editList ? MP4.edts(track.editList) : undefined, MP4.mdia(track) ) // console.log('[remux],trak, len,', trak.byteLength, track.id, hashVal(trak.toString())) @@ -295,6 +307,14 @@ export class MP4 { return tkhd } + static edts (elstData) { + return MP4.box(MP4.types.edts, MP4.elst(elstData)) + } + + static elst ({entries, entriesData, version}) { + return MP4.FullBox(MP4.types.elst, version, 0, entriesData) + } + static mdia (track) { const mdia = MP4.box(MP4.types.mdia, MP4.mdhd(track.duration, track.timescale), MP4.hdlr(track.type), MP4.minf(track)) // console.log('[remux],mdia, len,', mdia.byteLength, hashVal(mdia.toString())) @@ -353,7 +373,7 @@ export class MP4 { content = MP4.encv(track) // console.log('[remux],encv, len,', content.byteLength, track.type, hashVal(content.toString())) } else { - content = MP4.avc1hev1(track) + content = MP4.avc1hev1vvc1(track) // console.log('[remux],avc1hev1, len,', content.byteLength, track.type, hashVal(content.toString())) } const stsd = MP4.box(MP4.types.stsd, new Uint8Array([ @@ -493,10 +513,19 @@ export class MP4 { return MP4.box(MP4.types.sinf, content, MP4.box(MP4.types.frma, frma), MP4.box(MP4.types.schm, schm), schi) } - static avc1hev1 (track) { - const isHevc = track.codecType === VideoCodecType.HEVC - const typ = isHevc ? MP4.types.hvc1 : MP4.types.avc1 - const config = isHevc ? MP4.hvcC(track) : MP4.avcC(track) + static avc1hev1vvc1 (track) { + let config + let typ + if (track.codecType === VideoCodecType.HEVC) { + config = MP4.hvcC(track) + typ = MP4.types.hvc1 + } else { + config = MP4.avcC(track) + typ = MP4.types.avc1 + } + // const isHevc = track.codecType === VideoCodecType.HEVC + // const typ = isHevc ? MP4.types.hvc1 : MP4.types.avc1 + // const config = isHevc ? MP4.hvcC(track) : MP4.avcC(track) const boxes = [ new Uint8Array([ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved @@ -524,7 +553,7 @@ export class MP4 { ] // console.log('[remux],avc1hev1_0, len,', boxes[0].byteLength, hashVal(boxes[0].toString())) // console.log('[remux],avc1hev1_1, len,', boxes[1].byteLength, hashVal(boxes[1].toString())) - if (isHevc) { + if (track.codecType === VideoCodecType.HEVC) { boxes.push(MP4.box(MP4.types.fiel, new Uint8Array([0x01, 0x00]))) // console.log('[remux],fiel, len,', boxes[2].byteLength, hashVal(boxes[2].toString())) } else if (track.sarRatio && track.sarRatio.length > 1) { diff --git a/packages/xgplayer-transmuxer/src/utils/index.js b/packages/xgplayer-transmuxer/src/utils/index.js index 70d16a3e6..c74660c7c 100644 --- a/packages/xgplayer-transmuxer/src/utils/index.js +++ b/packages/xgplayer-transmuxer/src/utils/index.js @@ -29,10 +29,20 @@ export function readBig32 (data, i = 0) { return (data[i] << 24 >>> 0) + (data[i + 1] << 16) + (data[i + 2] << 8) + (data[i + 3] || 0) } +export function readInt32 (data, i = 0) { + const dv = new DataView(data.buffer, data.byteOffset, data.byteLength) + return dv.getInt32(i) +} + export function readBig64 (data, i = 0) { return readBig32(data, i) * MAX_SIZE + readBig32(data, i + 4) } +export function readInt64 (data, i = 0) { + const dv = new DataView(data.buffer, data.byteOffset, data.byteLength) + return (dv.getUint32(i) << 32) | dv.getUint32(i + 4) +} + export function getAvcCodec (codecs) { let codec = 'avc1.' let h diff --git a/yarn.lock b/yarn.lock index e7cc8c469..4e3d3fe2a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6360,6 +6360,15 @@ xgplayer-mp4-loader@0.0.1: xgplayer-streaming-shared "3.0.0-next.2-1" xgplayer-transmuxer "3.0.0-next.2-1" +xgplayer-mp4-loader@3.0.11-alpha.10: + version "3.0.11-alpha.10" + resolved "https://registry.npmjs.org/xgplayer-mp4-loader/-/xgplayer-mp4-loader-3.0.11-alpha.10.tgz#9d3e64d47a66f05c7b88b202241f662600fbb3c3" + integrity sha512-8PlqOmPEMoq3I92vTKRlO+jIQkPbcLIgNj0xLXcT1AiaiJIUQU5v0wTp0eueBonB/8g99wCBV2OGwBa0ECJMDg== + dependencies: + eventemitter3 "^4.0.7" + xgplayer-streaming-shared "3.0.11-alpha.10" + xgplayer-transmuxer "3.0.11-alpha.10" + xgplayer-streaming-shared@3.0.0-next.2-1: version "3.0.0-next.2-1" resolved "https://registry.yarnpkg.com/xgplayer-streaming-shared/-/xgplayer-streaming-shared-3.0.0-next.2-1.tgz#357cd2d160f3f12a21dacd8a8fadb7f598639166" @@ -6392,6 +6401,15 @@ xgplayer-transmuxer@3.0.0-next.8: "@babel/runtime" "^7.15.3" core-js "3" +xgplayer-transmuxer@3.0.11-alpha.10: + version "3.0.11-alpha.10" + resolved "https://registry.npmjs.org/xgplayer-transmuxer/-/xgplayer-transmuxer-3.0.11-alpha.10.tgz#8f4a461f672b11107d07e46d98b77fc2490227e8" + integrity sha512-QbUkF4m1KXt+JOeTwM0AO/VZN6g697thGZjYMPkoXjxIU4I0F8lybc46Ng+0bZn2aKci54gAE660xsyiHFEUkA== + dependencies: + "@babel/runtime" "^7.15.3" + concat-typed-array "^1.0.2" + crypto-es "^1.2.4" + xml-name-validator@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz#79a006e2e63149a8600f15430f0a4725d1524835"