Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiplication is expensive #325

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 17 additions & 18 deletions ViroRenderer/VROBoneUBO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ VROBoneUBO::VROBoneUBO(std::shared_ptr<VRODriverOpenGL> driver) :
// Adreno + OVR)
VROBonesData data;
VROMatrix4f identity;
for (int i = 0; i < kMaxBones; i++) {
memcpy(&data.bone_transforms[i * kFloatsPerBone], identity.getArray(), kFloatsPerBone * sizeof(float));
for (int i = 0, iFPB = 0; i < kMaxBones; i++, iFPB += kFloatsPerBone) {
memcpy(&data.bone_transforms[iFPB], identity.getArray(), kFloatsPerBone * sizeof(float));
}
GL( glBindBuffer(GL_UNIFORM_BUFFER, _bonesUBO) );
GL( glBufferData(GL_UNIFORM_BUFFER, sizeof(VROBonesData), &data, GL_DYNAMIC_DRAW) );
Expand All @@ -136,11 +136,11 @@ void VROBoneUBO::update(const std::shared_ptr<VROSkinner> &skinner) {

VROBonesData data;
int numBones = skinner->getSkeleton()->getNumBones();
for (int i = 0; i < numBones; i++) {
for (int i = 0, iFPB = 0; i < numBones; i++, iFPB += kFloatsPerBone) {
if (i >= kMaxBones) {
break;
}

VROMatrix4f transform = skinner->getModelTransform(i);
if (kDualQuaternionEnabled) {
VROVector3f translation = transform.extractTranslation();
Expand All @@ -155,22 +155,21 @@ void VROBoneUBO::update(const std::shared_ptr<VROSkinner> &skinner) {
VROQuaternion real = dq.getReal();
VROQuaternion dual = dq.getDual();

int floatsPerBone = kFloatsPerBone;
data.bone_transforms[i * floatsPerBone + 0] = real.X;
data.bone_transforms[i * floatsPerBone + 1] = real.Y;
data.bone_transforms[i * floatsPerBone + 2] = real.Z;
data.bone_transforms[i * floatsPerBone + 3] = real.W;
data.bone_transforms[i * floatsPerBone + 4] = dual.X;
data.bone_transforms[i * floatsPerBone + 5] = dual.Y;
data.bone_transforms[i * floatsPerBone + 6] = dual.Z;
data.bone_transforms[i * floatsPerBone + 7] = dual.W;
data.bone_transforms[i * floatsPerBone + 8] = scale.x;
data.bone_transforms[i * floatsPerBone + 9] = scale.y;
data.bone_transforms[i * floatsPerBone + 10] = scale.z;
data.bone_transforms[i * floatsPerBone + 11] = 1.0;
data.bone_transforms[iFPB + 0] = real.X;
data.bone_transforms[iFPB + 1] = real.Y;
data.bone_transforms[iFPB + 2] = real.Z;
data.bone_transforms[iFPB + 3] = real.W;
data.bone_transforms[iFPB + 4] = dual.X;
data.bone_transforms[iFPB + 5] = dual.Y;
data.bone_transforms[iFPB + 6] = dual.Z;
data.bone_transforms[iFPB + 7] = dual.W;
data.bone_transforms[iFPB + 8] = scale.x;
data.bone_transforms[iFPB + 9] = scale.y;
data.bone_transforms[iFPB + 10] = scale.z;
data.bone_transforms[iFPB + 11] = 1.0;
}
else {
memcpy(&data.bone_transforms[i * kFloatsPerBone], transform.getArray(), kFloatsPerBone * sizeof(float));
memcpy(&data.bone_transforms[iFPB], transform.getArray(), kFloatsPerBone * sizeof(float));
}
}

Expand Down
18 changes: 10 additions & 8 deletions ViroRenderer/VROBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,16 @@ void VROBox::buildBoxVAR(VROShapeVertexLayout *vertexLayout) {
};

for (int i = 0; i < kNumBoxVertices; i++) {
vertexLayout[i].x = cubeVertices[i * 3 + 0];
vertexLayout[i].y = cubeVertices[i * 3 + 1];
vertexLayout[i].z = cubeVertices[i * 3 + 2];
vertexLayout[i].u = cubeTex[i * 2 + 0];
vertexLayout[i].v = cubeTex[i * 2 + 1];
vertexLayout[i].nx = cubeNormals[i * 3 + 0];
vertexLayout[i].ny = cubeNormals[i * 3 + 1];
vertexLayout[i].nz = cubeNormals[i * 3 + 2];
int i2 = i << 1;
int i3 = i + i2;
vertexLayout[i].x = cubeVertices[i3 + 0];
vertexLayout[i].y = cubeVertices[i3 + 1];
vertexLayout[i].z = cubeVertices[i3 + 2];
vertexLayout[i].u = cubeTex[i2 + 0];
vertexLayout[i].v = cubeTex[i2 + 1];
vertexLayout[i].nx = cubeNormals[i3 + 0];
vertexLayout[i].ny = cubeNormals[i3 + 1];
vertexLayout[i].nz = cubeNormals[i3 + 2];
}
}

Expand Down
24 changes: 12 additions & 12 deletions ViroRenderer/VROGLTFLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1390,11 +1390,11 @@ void VROGLTFLoader::processTangent(std::vector<std::shared_ptr<VROGeometryElemen
int sizeOfTangent = 4;
int vertexSize = pos->getVertexCount();
float *dataOut = new float[vertexSize * sizeOfTangent];
for (int i = 0; i < vertexSize; i ++ ) {
dataOut[(i * sizeOfTangent)] = generatedTangents[i].x;
dataOut[(i * sizeOfTangent) + 1] = generatedTangents[i].y;
dataOut[(i * sizeOfTangent) + 2] = generatedTangents[i].z;
dataOut[(i * sizeOfTangent) + 3] = generatedTangents[i].w;
for (int i = 0, j = 0; i < vertexSize; i++, j += sizeOfTangent) {
dataOut[j] = generatedTangents[i].x;
dataOut[j + 1] = generatedTangents[i].y;
dataOut[j + 2] = generatedTangents[i].z;
dataOut[j + 3] = generatedTangents[i].w;
}

int sizeOfSingleTangent = getTypeSize(GLTFType::Vec4) * getComponentTypeSize(GLTFTypeComponent::Float);
Expand Down Expand Up @@ -1795,7 +1795,7 @@ std::shared_ptr<VROGeometrySource> VROGLTFLoader::buildBoneWeightSource(GLTFType
int sizeOfSingleBoneWeight = getTypeSize(gType) * getComponentTypeSize(gTypeComponent);
float *dataOut = new float[sizeOfSingleBoneWeight * gAttributeAccesor.count]();
for (int elementIndex = 0; elementIndex < gAttributeAccesor.count; elementIndex++) {

int elementIndex4 = elementIndex << 2;
// For the current element, cycle through each of its float or type component
// and convert them into a float through the math conversions required by gLTF.
buffer.setPosition(elementIndex * bufferViewStride);
Expand All @@ -1806,11 +1806,11 @@ std::shared_ptr<VROGeometrySource> VROGLTFLoader::buildBoneWeightSource(GLTFType
weight.push_back(floatData);
} else if (gTypeComponent == GLTFTypeComponent::UnsignedByte) {
unsigned uByteData = buffer.readUnsignedByte();
float point = uByteData / 255.0;
float point = uByteData / 255.0F;
weight.push_back(point);
} else if (gTypeComponent == GLTFTypeComponent::UnsignedShort) {
unsigned short uShortData = buffer.readUnsignedShort();
float point = uShortData / 65535.0;
float point = uShortData / 65535.0F;
weight.push_back(point);
} else {
perr("Invalid weighted bone data provided for the 3D glTF skinner.");
Expand All @@ -1821,10 +1821,10 @@ std::shared_ptr<VROGeometrySource> VROGLTFLoader::buildBoneWeightSource(GLTFType
float totalWeight = weight[0] + weight[1] + weight[2] + weight[3];
VROVector4f normalizedWeight;
VROVector4f(weight[0], weight[1], weight[2], weight[3]).scale(1/totalWeight, &normalizedWeight);
dataOut[(elementIndex * 4)] = normalizedWeight.x;
dataOut[(elementIndex * 4) + 1] = normalizedWeight.y;
dataOut[(elementIndex * 4) + 2] = normalizedWeight.z;
dataOut[(elementIndex * 4) + 3] = normalizedWeight.w;
dataOut[elementIndex4] = normalizedWeight.x;
dataOut[elementIndex4 + 1] = normalizedWeight.y;
dataOut[elementIndex4 + 2] = normalizedWeight.z;
dataOut[elementIndex4 + 3] = normalizedWeight.w;
}

// Finally create our geometry sources with the normalized data.
Expand Down
4 changes: 2 additions & 2 deletions ViroRenderer/VROGeometryElement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ void VROGeometryElement::processIndices(std::function<void (int, int)> function)

int indexCount = _primitiveCount * 3;

for (int i = 0; i < indexCount; i++) {
buffer.setPosition(i * _bytesPerIndex);
for (int i = 0, o = 0; i < indexCount; i++, o += _bytesPerIndex) {
buffer.setPosition(o);
int idx;
if (_bytesPerIndex == 2) {
idx = _signed ? buffer.readShort() : buffer.readUnsignedShort();
Expand Down
12 changes: 6 additions & 6 deletions ViroRenderer/VROGeometrySource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ void VROGeometrySource::processVertices(std::function<void (int, VROVector4f)> f
std::shared_ptr<VROData> data = getData();
VROByteBuffer buffer(data->getData(), data->getDataLength(), false);

for (int i = 0; i < _vertexCount; i++) {
buffer.setPosition(i * _dataStride + _dataOffset);
for (int i = 0, o = _dataOffset; i < _vertexCount; i++, o += _dataStride) {
buffer.setPosition(o);

float x = 0, y = 0, z = 0, w = 0;

Expand Down Expand Up @@ -139,9 +139,9 @@ void VROGeometrySource::processVertices(std::function<void (int, VROVector4f)> f
void VROGeometrySource::modifyVertices(std::function<VROVector3f(int index, VROVector3f vertex)> function) const {
std::shared_ptr<VROData> data = getData();
VROByteBuffer buffer(data->getData(), data->getDataLength(), false);
for (int i = 0; i < _vertexCount; i++) {
buffer.setPosition(i * _dataStride + _dataOffset);

for (int i = 0, o = _dataOffset; i < _vertexCount; i++, o += _dataStride) {
buffer.setPosition(o);

float x = 0, y = 0, z = 0, w = 0;

Expand Down Expand Up @@ -228,7 +228,7 @@ void VROGeometrySource::modifyVertices(std::function<VROVector3f(int index, VROV

VROVector3f result = function(i, { x, y, z});

buffer.setPosition(i * _dataStride + _dataOffset);
buffer.setPosition(o);
if (_floatComponents) {
if (_bytesPerComponent == 2) {
if (_componentsPerVertex > 0) {
Expand Down
8 changes: 4 additions & 4 deletions ViroRenderer/VROHDRLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ std::shared_ptr<VROTexture> VROHDRLoader::loadTexture(float *data, int width, in
if (kCompressHDR) {
int packedLength = numPixels * sizeof(uint32_t);
uint32_t *packedF9E5 = (uint32_t *) malloc(packedLength);
for (int i = 0; i < numPixels; i++) {
float r = data[i * componentsPerPixel + 0];
float g = data[i * componentsPerPixel + 1];
float b = data[i * componentsPerPixel + 2];
for (int i = 0, j = 0; i < numPixels; i++, j += componentsPerPixel) {
float r = data[j];
float g = data[j + 1];
float b = data[j + 2];
// alpha is disregarded

const glm::vec3 v(r, g, b);
Expand Down
5 changes: 3 additions & 2 deletions ViroRenderer/VROLightingUBO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,12 @@ void VROLightingUBO::updateLightsVertex() {
continue;
}
int i = vertexData.num_lights;
int iFPM = i * kFloatsPerMatrix;

const VROMatrix4f &shadowView = light->getShadowViewMatrix();
memcpy(&vertexData.shadow_view_matrices[i * kFloatsPerMatrix], shadowView.getArray(), kFloatsPerMatrix * sizeof(float));
memcpy(&vertexData.shadow_view_matrices[iFPM], shadowView.getArray(), kFloatsPerMatrix * sizeof(float));
const VROMatrix4f &shadowProjection = light->getShadowProjectionMatrix();
memcpy(&vertexData.shadow_projection_matrices[i * kFloatsPerMatrix], shadowProjection.getArray(), kFloatsPerMatrix * sizeof(float));
memcpy(&vertexData.shadow_projection_matrices[iFPM], shadowProjection.getArray(), kFloatsPerMatrix * sizeof(float));

vertexData.num_lights++;
if (vertexData.num_lights >= kMaxLights) {
Expand Down
35 changes: 18 additions & 17 deletions ViroRenderer/VROMatrix4f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ VROMatrix4f::VROMatrix4f(const float *matrix) {

VROMatrix4f::VROMatrix4f(const glm::mat4x4 mat) {
for (int i = 0; i < 4; i++) {
_mtx[i * 4 + 0] = mat[i].x;
_mtx[i * 4 + 1] = mat[i].y;
_mtx[i * 4 + 2] = mat[i].z;
_mtx[i * 4 + 3] = mat[i].w;
int i4 = i << 2;
_mtx[i4 + 0] = mat[i].x;
_mtx[i4 + 1] = mat[i].y;
_mtx[i4 + 2] = mat[i].z;
_mtx[i4 + 3] = mat[i].w;
}
}

Expand Down Expand Up @@ -90,7 +91,7 @@ void VROMatrix4f::rotateX(float angleRad) {
float rcos = sincosr[1];

for (int i = 0; i < 3; i++) {
int i1 = i * 4 + 1;
int i1 = (i << 2) + 1;
int i2 = i1 + 1;
float t = _mtx[i1];
_mtx[i1] = t * rcos - _mtx[i2] * rsin;
Expand All @@ -106,10 +107,10 @@ void VROMatrix4f::rotateY(float angleRad) {
float rcos = sincosr[1];

for (int i = 0; i < 3; i++) {
int i0 = i * 4;
int i2 = i0 + 2;
float t = _mtx[i0];
_mtx[i0] = t * rcos + _mtx[i2] * rsin;
int i4 = i << 2;
int i2 = i4 + 2;
float t = _mtx[i4];
_mtx[i4] = t * rcos + _mtx[i2] * rsin;
_mtx[i2] = _mtx[i2] * rcos - t * rsin;
}
}
Expand All @@ -122,10 +123,10 @@ void VROMatrix4f::rotateZ(float angleRad) {
float rcos = sincosr[1];

for (int i = 0; i < 3; i++) {
int i0 = i * 4;
int i1 = i0 + 1;
float t = _mtx[i0];
_mtx[i0] = t * rcos - _mtx[i1] * rsin;
int i4 = i << 2;
int i1 = i4 + 1;
float t = _mtx[i4];
_mtx[i4] = t * rcos - _mtx[i1] * rsin;
_mtx[i1] = t * rsin + _mtx[i1] * rcos;
}
}
Expand Down Expand Up @@ -199,10 +200,10 @@ void VROMatrix4f::translate(const VROVector3f &vector) {

void VROMatrix4f::scale(float x, float y, float z) {
for (int i = 0; i < 3; i++) {
int i0 = i * 4;
_mtx[i0] *= x;
_mtx[i0 + 1] *= y;
_mtx[i0 + 2] *= z;
int i4 = i << 2;
_mtx[i4] *= x;
_mtx[i4 + 1] *= y;
_mtx[i4 + 2] *= z;
}
}

Expand Down
44 changes: 24 additions & 20 deletions ViroRenderer/VROMorpher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,12 +699,12 @@ std::shared_ptr<VROGeometrySource> VROMorpher::convertVecToGeoSource(

float *sourcesData = (dataVecIn.size() > 0) ? new float[dataVecIn.size() * componentsPerVertex] : nullptr;
int morphSize = (int) dataVecIn.size();
for (int i = 0; i < morphSize; i ++) {
sourcesData[ i * componentsPerVertex] = dataVecIn[i].x;
sourcesData[(i * componentsPerVertex) + 1] = dataVecIn[i].y;
sourcesData[(i * componentsPerVertex) + 2] = dataVecIn[i].z;
if (componentsPerVertex > 3){
sourcesData[(i * componentsPerVertex) + 3] = dataVecIn[i].w;
for (int i = 0, j = 0; i < morphSize; i++, j += componentsPerVertex) {
sourcesData[j] = dataVecIn[i].x;
sourcesData[j + 1] = dataVecIn[i].y;
sourcesData[j + 2] = dataVecIn[i].z;
if (componentsPerVertex > 3) {
sourcesData[j + 3] = dataVecIn[i].w;
}
}
std::shared_ptr<VROData> data = std::make_shared<VROData>((void *) sourcesData, dataVecIn.size() * componentsPerVertex * sizeof(float));
Expand Down Expand Up @@ -776,9 +776,10 @@ inline void VROMorpher::addWeightedMorphToSrc3(float *srcDataOut,
float weight) {
int morphSize = (int) morphData.size();
for (int i = 0; i < morphSize; i ++) {
srcDataOut[i * 3] += (morphData[i].x * (weight));
srcDataOut[(i * 3) + 1] += (morphData[i].y * (weight));
srcDataOut[(i * 3) + 2] += (morphData[i].z * (weight));
int i3 = i * 3;
srcDataOut[i3] += (morphData[i].x * (weight));
srcDataOut[i3 + 1] += (morphData[i].y * (weight));
srcDataOut[i3 + 2] += (morphData[i].z * (weight));
}
}

Expand All @@ -787,26 +788,29 @@ inline void VROMorpher::addWeightedMorphToSrc4(float *srcDataOut,
float weight) {
int morphSize = (int) morphData.size();
for (int i = 0; i < morphSize; i ++) {
srcDataOut[i * 4] += (morphData[i].x * (weight));
srcDataOut[(i * 4) + 1] += (morphData[i].y * (weight));
srcDataOut[(i * 4) + 2] += (morphData[i].z * (weight));
srcDataOut[(i * 4) + 3] += (morphData[i].z * (weight));
int i4 = i << 2;
srcDataOut[i4] += (morphData[i].x * (weight));
srcDataOut[i4 + 1] += (morphData[i].y * (weight));
srcDataOut[i4 + 2] += (morphData[i].z * (weight));
srcDataOut[i4 + 3] += (morphData[i].z * (weight));
}
}

inline void VROMorpher::resetSrc3(float *srcDataOut, int size) {
for (int i = 0; i < size; i ++) {
srcDataOut[i * 3] = 0;
srcDataOut[(i * 3) + 1] = 0;
srcDataOut[(i * 3) + 2] = 0;
int i3 = i * 3;
srcDataOut[i3] = 0;
srcDataOut[i3 + 1] = 0;
srcDataOut[i3 + 2] = 0;
}
}

inline void VROMorpher::resetSrc4(float *srcDataOut, int size) {
for (int i = 0; i < size; i ++) {
srcDataOut[i * 4] = 0;
srcDataOut[(i * 4) + 1] = 0;
srcDataOut[(i * 4) + 2] = 0;
srcDataOut[(i * 4) + 3] = 0;
int i4 = i << 2;
srcDataOut[i4] = 0;
srcDataOut[i4 + 1] = 0;
srcDataOut[i4 + 2] = 0;
srcDataOut[i4 + 3] = 0;
}
}
13 changes: 7 additions & 6 deletions ViroRenderer/VROParticleUBO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,17 @@ int VROParticleUBO::bindDrawData(int currentDrawCallIndex) {
// Parse / serialize the data into the uniform buffer object
VROParticlesUBOVertexData vertexData;
VROParticlesUBOFragmentData fragmentData;
for (int i = start; i < end; i++) {
for (int i = start, i0 = 0; i < end; i++, i0++) {
int i1 = i0 << 2;
const float *transformArray = _lastKnownParticles[i].currentWorldTransform.getArray();
memcpy(&vertexData.particles_transform[(i - start) * kMaxFloatsPerTransform],
memcpy(&vertexData.particles_transform[i0 * kMaxFloatsPerTransform],
transformArray,
kMaxFloatsPerTransform * sizeof(float));

fragmentData.frag_particles_color[(i - start) * 4 + 0] = _lastKnownParticles[i].colorCurrent.x;
fragmentData.frag_particles_color[(i - start) * 4 + 1] = _lastKnownParticles[i].colorCurrent.y;
fragmentData.frag_particles_color[(i - start) * 4 + 2] = _lastKnownParticles[i].colorCurrent.z;
fragmentData.frag_particles_color[(i - start) * 4 + 3] = _lastKnownParticles[i].colorCurrent.w;
fragmentData.frag_particles_color[i1 + 0] = _lastKnownParticles[i].colorCurrent.x;
fragmentData.frag_particles_color[i1 + 1] = _lastKnownParticles[i].colorCurrent.y;
fragmentData.frag_particles_color[i1 + 2] = _lastKnownParticles[i].colorCurrent.z;
fragmentData.frag_particles_color[i1 + 3] = _lastKnownParticles[i].colorCurrent.w;
}

// Finally bind the UBO to its corresponding buffers.
Expand Down