Skip to content

Commit

Permalink
Optimise software subtitle rendering by reducing allocations & avoidi…
Browse files Browse the repository at this point in the history
…ng FPU (#8)

Can be tested by

setfps 60
anim s0_2,"no64",0
flush 1
bgmplay2 58,100,1
lsp s0_5,"*8"
flush 1
click
*again
click
goto *again

goto *boot_logo

Co-authored-by: vit9696 <[email protected]>
  • Loading branch information
vit9696 and vit9696 authored Jun 2, 2020
1 parent 5a23bb1 commit 57fcc70
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 69 deletions.
37 changes: 8 additions & 29 deletions Engine/Layers/Subtitle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,36 +234,15 @@ void SubtitleLayer::doDecoding() {
SubtitleFrame frame;
frame.start_timestamp = decoded_timestamp;

try {
if (subtitleDriver.extractFrame(frame.imgs, decoded_timestamp / 1000000)) {
frameReady = true;
}
} catch (ASS_Image *img) {
sendToLog(LogLevel::Warn, "Falling back to software renderer, this will be slow\n");

ASS_Image *img;
int changed = subtitleDriver.extractFrame(frame.imgs, decoded_timestamp / 1000000, &img);
if (changed >= 0) {
frameReady = changed > 0;
} else {
const size_t frame_size = width * height * 4;
auto premultiplied_frame = std::make_unique<float[]>(frame_size);

// Use surface if needed
if (subtitleDriver.blendBufInNeed(premultiplied_frame.get(), width, height, current_frame_format, decoded_timestamp / 1000000, img)) {
auto byte_frame = std::make_unique<uint8_t[]>(frame_size);
float *float_ptr = premultiplied_frame.get();
uint8_t *byte_ptr = byte_frame.get();

for (unsigned int y = 0; y < height; y++) {
ptrdiff_t line = 4 * y * width;
for (unsigned int x = 0; x < width; x++) {
ptrdiff_t pos = line + 4 * x;
byte_ptr[pos + 0] = float_ptr[pos + 0] * 255;
byte_ptr[pos + 1] = float_ptr[pos + 1] * 255;
byte_ptr[pos + 2] = float_ptr[pos + 2] * 255;
byte_ptr[pos + 3] = float_ptr[pos + 3] * 255;
}
}

frame.sw_buffer = std::move(byte_frame);
frameReady = true;
}
frame.sw_buffer = std::make_unique<uint8_t[]>(frame_size);
subtitleDriver.blendBufInNeed(frame.sw_buffer.get(), width, current_frame_format, img);
frameReady = true;
}

if (frameReady) {
Expand Down
73 changes: 36 additions & 37 deletions Engine/Media/SubtitleDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ static void ass_yuv_blend(uint8_t *planes[4], size_t planesCnt, AVPixelFormat fo
}
}

static void ass_pregpu_blend(float *frame, size_t linesize, int format, ASS_Image *img) {
static void ass_pregpu_blend(uint8_t *frame, size_t linesize, int format, ASS_Image *img) {
//int cnt = 0;

off_t r_off = 2;
Expand All @@ -168,14 +168,14 @@ static void ass_pregpu_blend(float *frame, size_t linesize, int format, ASS_Imag
ons.errorAndExit("Unsupported texture foramt");

while (img) {
float opacity = (255 - ((img->color) & 0xFF)) / 65025.0f;
float r = (img->color >> 24) / 255.0f;
float g = ((img->color >> 16) & 0xFF) / 255.0f;
float b = ((img->color >> 8) & 0xFF) / 255.0f;
uint8_t opacity = (255 - ((img->color) & 0xFF));
uint8_t r = (img->color >> 24);
uint8_t g = ((img->color >> 16) & 0xFF);
uint8_t b = ((img->color >> 8) & 0xFF);
const int32_t Bpp = 4;

uint8_t *src;
float *dst;
uint8_t *dst;

src = img->bitmap;
dst = frame + img->dst_y * linesize * Bpp + img->dst_x * Bpp;
Expand All @@ -186,20 +186,28 @@ static void ass_pregpu_blend(float *frame, size_t linesize, int format, ASS_Imag

// GPU_FUNC_ONE, GPU_FUNC_ONE_MINUS_SRC_ALPHA

float a_ = src[x] * opacity;
float r_ = r * a_;
float g_ = g * a_;
float b_ = b * a_;
uint8_t front[4];
front[a_off] = static_cast<uint32_t>(src[x]) * opacity / 255;
front[r_off] = static_cast<uint32_t>(r) * front[a_off] / 255;
front[g_off] = static_cast<uint32_t>(g) * front[a_off] / 255;
front[b_off] = static_cast<uint32_t>(b) * front[a_off] / 255;

uint8_t *back = &dst[x * Bpp];

if (front[a_off] == 0xFF) {
memcpy(back, front, sizeof (front));
continue;
}

dst[x * Bpp + r_off] *= 1 - a_;
dst[x * Bpp + b_off] *= 1 - a_;
dst[x * Bpp + g_off] *= 1 - a_;
dst[x * Bpp + a_off] *= 1 - a_;
auto blend = [](uint8_t back, uint8_t front, uint8_t inv_front_opacity) {
return front + (inv_front_opacity * back) / 0xFF;
};

dst[x * Bpp + r_off] += r_;
dst[x * Bpp + b_off] += b_;
dst[x * Bpp + g_off] += g_;
dst[x * Bpp + a_off] += a_;
back[r_off] = blend(back[r_off], front[r_off], (0xFF - front[a_off]));
back[g_off] = blend(back[g_off], front[g_off], (0xFF - front[a_off]));
back[b_off] = blend(back[b_off], front[b_off], (0xFF - front[b_off]));
if (back[a_off] != 0xFF)
back[a_off] = blend (back[a_off], front[a_off], (0xFF - front[a_off]));
}
src += img->stride;
dst += linesize * Bpp;
Expand Down Expand Up @@ -314,25 +322,11 @@ bool SubtitleDriver::blendInNeed(SDL_Surface *surface, uint64_t timestamp) {
return changed;
}

bool SubtitleDriver::blendBufInNeed(float *buffer, size_t width, size_t /*height*/, int format, uint64_t timestamp, ASS_Image *img) {
int changed{0};

if (!img) {
Lock lock(ass_track);
img = ass_render_frame(ass_renderer, ass_track, timestamp, &changed);
} else {
changed = 1;
}

if (changed && img && img->w > 0 && img->h > 0) {
ass_pregpu_blend(buffer, width, format, img);
return true;
}

return changed >= 1;
void SubtitleDriver::blendBufInNeed(uint8_t *buffer, size_t width, int format, ASS_Image *img) {
ass_pregpu_blend(buffer, width, format, img);
}

bool SubtitleDriver::extractFrame(std::vector<SubtitleImage> &images, uint64_t timestamp) {
int SubtitleDriver::extractFrame(std::vector<SubtitleImage> &images, uint64_t timestamp, ASS_Image **imgptr) {
ASS_Image *img{nullptr};
int changed{0};

Expand All @@ -341,11 +335,16 @@ bool SubtitleDriver::extractFrame(std::vector<SubtitleImage> &images, uint64_t t
img = ass_render_frame(ass_renderer, ass_track, timestamp, &changed);
}

if (changed == 0)
return 0;

bool fits;
size_t num = countImages(img, fits);

if (!fits || num > NIMGS_MAX)
throw img;
if (!fits || num > NIMGS_MAX) {
*imgptr = img;
return -1;
}

images.reserve(num);

Expand Down
4 changes: 2 additions & 2 deletions Engine/Media/SubtitleDriver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ class SubtitleDriver {
bool blendOn(SDL_Surface *surface, uint64_t timestamp);
bool blendOn(uint8_t *planes[4], size_t planesCnt, AVPixelFormat format, int linesize[AV_NUM_DATA_POINTERS], int height, uint64_t timestamp);
bool blendInNeed(SDL_Surface *surface, uint64_t timestamp);
bool blendBufInNeed(float *buffer, size_t width, size_t height, int format, uint64_t timestamp, ASS_Image *img = nullptr);
bool extractFrame(std::vector<SubtitleImage> &images, uint64_t timestamp); /* Extracts all Ass_Image`s for this timestamp */
void blendBufInNeed(uint8_t *buffer, size_t width, int format, ASS_Image *img);
int extractFrame(std::vector<SubtitleImage> &images, uint64_t timestamp, ASS_Image **imgptr); /* Extracts all Ass_Image`s for this timestamp */
private:
ASS_Library *ass_library{nullptr}; // libass library (handle)
ASS_Renderer *ass_renderer{nullptr}; // libass renderer
Expand Down
2 changes: 1 addition & 1 deletion ONScripter.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -1879,7 +1879,7 @@
1C0A5F5717478DCF004A9BCC /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 1140;
LastUpgradeCheck = 1150;
TargetAttributes = {
1C9F26D51ADB0E7900138E84 = {
CreatedOnToolsVersion = 6.3;
Expand Down

0 comments on commit 57fcc70

Please sign in to comment.