Skip to content

Commit d12e43b

Browse files
cdtwiggmeta-codesync[bot]
authored andcommitted
Add Python bindings for CameraProjectionErrorFunction (#1039)
Summary: Pull Request resolved: #1039 Expose CameraProjectionErrorFunctionT<float> to the pymomentum.solver2 module. This error function projects 3D skeleton points through a bone-parented camera using an IntrinsicsModel and penalizes reprojection error in pixel space. The camera can be attached to a skeleton joint or be static in world space (camera_parent=-1). Bindings include CameraProjectionConstraint data class, single/batch constraint addition, and constraint management methods. The solver2 module now imports pymomentum.camera so pybind11 can resolve IntrinsicsModel types across modules. Reviewed By: jeongseok-meta Differential Revision: D93160003 fbshipit-source-id: b169a4ff225a688f3d4b7d32abd0aa1cf148098d
1 parent 23c8bcb commit d12e43b

File tree

6 files changed

+358
-18
lines changed

6 files changed

+358
-18
lines changed

pymomentum/CMakeLists.txt

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ mt_python_binding(
267267
${ATEN_INCLUDE_DIR}
268268
${TORCH_INCLUDE_DIRS}
269269
LINK_LIBRARIES
270+
camera
270271
character
271272
character_solver
272273
character_sequence_solver
@@ -312,17 +313,17 @@ mt_python_binding(
312313
${TORCH_CXX_FLAGS}
313314
)
314315

315-
if(MOMENTUM_BUILD_RENDERER)
316-
mt_python_binding(
317-
NAME pymomentum_camera
318-
MODULE_NAME camera
319-
PYMOMENTUM_HEADERS_VARS camera_public_headers
320-
PYMOMENTUM_SOURCES_VARS camera_sources
321-
LINK_LIBRARIES
322-
camera
323-
rasterizer
324-
)
316+
mt_python_binding(
317+
NAME pymomentum_camera
318+
MODULE_NAME camera
319+
PYMOMENTUM_HEADERS_VARS camera_public_headers
320+
PYMOMENTUM_SOURCES_VARS camera_sources
321+
LINK_LIBRARIES
322+
camera
323+
fmt::fmt-header-only
324+
)
325325

326+
if(MOMENTUM_BUILD_RENDERER)
326327
mt_python_binding(
327328
NAME renderer
328329
PYMOMENTUM_HEADERS_VARS renderer_public_headers

pymomentum/camera/camera_pybind.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*/
77

88
#include <momentum/camera/camera.h>
9-
#include <momentum/rasterizer/rasterizer.h>
9+
#include <momentum/simd/simd.h>
1010

1111
#include <fmt/format.h>
1212
#include <pybind11/eigen.h>
@@ -101,20 +101,17 @@ The returned model is mutable and can be modified for optimization purposes.
101101
auto res_acc = result.mutable_unchecked<2>();
102102
auto pts_acc = points.unchecked<2>();
103103

104-
for (py::ssize_t i = 0; i < points.shape(0);
105-
i += momentum::rasterizer::kSimdPacketSize) {
106-
momentum::rasterizer::FloatP px, py, pz;
104+
for (py::ssize_t i = 0; i < points.shape(0); i += momentum::kSimdPacketSize) {
105+
momentum::FloatP px, py, pz;
107106
auto nPtsCur = std::min(
108-
static_cast<py::ssize_t>(momentum::rasterizer::kSimdPacketSize),
109-
points.shape(0) - i);
107+
static_cast<py::ssize_t>(momentum::kSimdPacketSize), points.shape(0) - i);
110108
for (py::ssize_t k = 0; k < nPtsCur; ++k) {
111109
px[k] = pts_acc(i + k, 0);
112110
py[k] = pts_acc(i + k, 1);
113111
pz[k] = pts_acc(i + k, 2);
114112
}
115113

116-
const auto [res, valid] =
117-
intrinsics.project(momentum::rasterizer::Vector3fP(px, py, pz));
114+
const auto [res, valid] = intrinsics.project(momentum::Vector3fP(px, py, pz));
118115

119116
for (py::ssize_t k = 0; k < nPtsCur; ++k) {
120117
res_acc(i + k, 0) = res.x()[k];

pymomentum/doc/conf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,20 @@
2323
]
2424

2525
html_theme = "sphinx_rtd_theme"
26+
27+
28+
def skip_pycapsule(app, what, name, obj, skip, options):
29+
"""Skip PyCapsule members to avoid RST warnings from their docstrings.
30+
31+
PyCapsule is an internal CPython/pybind11 type used for cross-module type
32+
sharing. Its __init__ docstring contains '*' characters that trigger RST
33+
parsing warnings. exclude-members only hides it from output but doesn't
34+
prevent docstring parsing; this event fires earlier and skips it entirely.
35+
"""
36+
if name == "PyCapsule":
37+
return True
38+
return skip
39+
40+
41+
def setup(app):
42+
app.connect("autodoc-skip-member", skip_pycapsule)

pymomentum/solver2/solver2_error_functions.cpp

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <momentum/character/skeleton.h>
1010
#include <momentum/character/skeleton_state.h>
1111
#include <momentum/character_solver/aim_error_function.h>
12+
#include <momentum/character_solver/camera_projection_error_function.h>
1213
#include <momentum/character_solver/collision_error_function.h>
1314
#include <momentum/character_solver/distance_error_function.h>
1415
#include <momentum/character_solver/fixed_axis_error_function.h>
@@ -36,6 +37,7 @@
3637
#include <pybind11/pybind11.h>
3738
#include <pybind11/stl.h>
3839
#include <Eigen/Core>
40+
#include <variant>
3941

4042
namespace py = pybind11;
4143
namespace mm = momentum;
@@ -1652,6 +1654,216 @@ source joints respectively.)")
16521654
"Returns the number of constraints.");
16531655
}
16541656

1657+
void defCameraProjectionErrorFunction(py::module_& m) {
1658+
py::class_<mm::ProjectionConstraintT<float>>(
1659+
m, "CameraProjectionConstraint", "Read-only access to a camera projection constraint.")
1660+
.def(
1661+
"__repr__",
1662+
[](const mm::ProjectionConstraintT<float>& self) {
1663+
return fmt::format(
1664+
"CameraProjectionConstraint(parent={}, weight={}, offset=[{:.3f}, {:.3f}, {:.3f}], target=[{:.3f}, {:.3f}])",
1665+
self.parent,
1666+
self.weight,
1667+
self.offset.x(),
1668+
self.offset.y(),
1669+
self.offset.z(),
1670+
self.target.x(),
1671+
self.target.y());
1672+
})
1673+
.def_readonly("parent", &mm::ProjectionConstraintT<float>::parent, "The parent joint index.")
1674+
.def_readonly(
1675+
"offset",
1676+
&mm::ProjectionConstraintT<float>::offset,
1677+
"The offset from the parent joint in local space.")
1678+
.def_readonly(
1679+
"weight", &mm::ProjectionConstraintT<float>::weight, "The weight of the constraint.")
1680+
.def_readonly(
1681+
"target", &mm::ProjectionConstraintT<float>::target, "The target 2D pixel position.");
1682+
1683+
py::class_<
1684+
mm::CameraProjectionErrorFunctionT<float>,
1685+
mm::SkeletonErrorFunction,
1686+
std::shared_ptr<mm::CameraProjectionErrorFunctionT<float>>>(
1687+
m,
1688+
"CameraProjectionErrorFunction",
1689+
R"(Projects 3D skeleton points through a bone-parented camera using an IntrinsicsModel
1690+
and penalizes reprojection error in pixel space.
1691+
1692+
The camera is rigidly attached to a skeleton joint (camera_parent) with a fixed offset
1693+
(camera_offset). If camera_parent is -1, the camera is static in world space and
1694+
camera_offset is used directly as the eye-from-world transform.)")
1695+
.def(
1696+
"__repr__",
1697+
[](const mm::CameraProjectionErrorFunctionT<float>& self) {
1698+
return fmt::format(
1699+
"CameraProjectionErrorFunction(weight={}, num_constraints={})",
1700+
self.getWeight(),
1701+
self.numConstraints());
1702+
})
1703+
.def(
1704+
py::init<>(
1705+
[](const mm::Character& character,
1706+
std::variant<mm::Camera, std::shared_ptr<const mm::IntrinsicsModel>>
1707+
cameraOrIntrinsics,
1708+
std::optional<int> cameraParent,
1709+
std::optional<Eigen::Matrix4f> cameraOffset,
1710+
float weight) -> std::shared_ptr<mm::CameraProjectionErrorFunctionT<float>> {
1711+
validateWeight(weight, "weight");
1712+
size_t parent = mm::kInvalidIndex;
1713+
if (cameraParent.has_value()) {
1714+
validateJointIndex(*cameraParent, "camera_parent", character.skeleton);
1715+
parent = static_cast<size_t>(*cameraParent);
1716+
}
1717+
std::shared_ptr<const mm::IntrinsicsModel> intrinsicsModel;
1718+
Eigen::Affine3f offset;
1719+
if (auto* camera = std::get_if<mm::Camera>(&cameraOrIntrinsics)) {
1720+
intrinsicsModel = camera->intrinsicsModel();
1721+
if (cameraOffset.has_value()) {
1722+
Eigen::Affine3f additionalOffset;
1723+
additionalOffset.matrix() = *cameraOffset;
1724+
offset = additionalOffset * camera->eyeFromWorld();
1725+
} else {
1726+
offset = camera->eyeFromWorld();
1727+
}
1728+
} else {
1729+
intrinsicsModel =
1730+
std::get<std::shared_ptr<const mm::IntrinsicsModel>>(cameraOrIntrinsics);
1731+
if (!intrinsicsModel) {
1732+
throw std::invalid_argument("intrinsics_model must not be None.");
1733+
}
1734+
offset.matrix() = cameraOffset.value_or(Eigen::Matrix4f::Identity());
1735+
}
1736+
auto result = std::make_shared<mm::CameraProjectionErrorFunctionT<float>>(
1737+
character.skeleton,
1738+
character.parameterTransform,
1739+
std::move(intrinsicsModel),
1740+
parent,
1741+
offset);
1742+
result->setWeight(weight);
1743+
return result;
1744+
}),
1745+
R"(Initialize a CameraProjectionErrorFunction.
1746+
1747+
The second argument can be either a :class:`pymomentum.camera.Camera` or a
1748+
:class:`pymomentum.camera.IntrinsicsModel` (e.g.
1749+
:class:`pymomentum.camera.PinholeIntrinsicsModel`).
1750+
1751+
When a :class:`pymomentum.camera.Camera` is provided, its intrinsics and
1752+
eye-from-world transform are used. If ``camera_offset`` is also provided, it
1753+
is composed as an additional transform: ``camera_offset @ camera.eye_from_world``.
1754+
1755+
When a :class:`pymomentum.camera.IntrinsicsModel` is provided, the
1756+
``camera_offset`` parameter specifies the eye-from-world transform (defaults
1757+
to identity if not provided).
1758+
1759+
:param character: The character to use.
1760+
:param camera: A :class:`pymomentum.camera.Camera` or
1761+
:class:`pymomentum.camera.IntrinsicsModel` (e.g.
1762+
:class:`pymomentum.camera.PinholeIntrinsicsModel`).
1763+
:param camera_parent: The joint index that the camera is attached to, or
1764+
None for a static camera in world space.
1765+
:param camera_offset: A 4x4 transformation matrix. When used with a
1766+
:class:`pymomentum.camera.Camera`, this is composed as an additional
1767+
transform on top of the camera's eye-from-world. When used with a
1768+
:class:`pymomentum.camera.IntrinsicsModel`, this is the eye-from-world
1769+
transform directly.
1770+
:param weight: The weight applied to the error function.)",
1771+
py::keep_alive<1, 2>(),
1772+
py::keep_alive<1, 3>(),
1773+
py::arg("character"),
1774+
py::arg("camera"),
1775+
py::kw_only(),
1776+
py::arg("camera_parent") = py::none(),
1777+
py::arg("camera_offset") = py::none(),
1778+
py::arg("weight") = 1.0f)
1779+
.def(
1780+
"add_constraint",
1781+
[](mm::CameraProjectionErrorFunctionT<float>& self,
1782+
int parent,
1783+
const Eigen::Vector2f& target,
1784+
const std::optional<Eigen::Vector3f>& offset,
1785+
float weight) {
1786+
validateJointIndex(parent, "parent", self.getSkeleton());
1787+
validateWeight(weight, "weight");
1788+
mm::ProjectionConstraintT<float> constraint;
1789+
constraint.parent = parent;
1790+
constraint.target = target;
1791+
constraint.offset = offset.value_or(Eigen::Vector3f::Zero());
1792+
constraint.weight = weight;
1793+
self.addConstraint(constraint);
1794+
},
1795+
R"(Adds a camera projection constraint.
1796+
1797+
:param parent: The index of the parent joint.
1798+
:param target: The 2D target pixel position.
1799+
:param offset: The offset from the parent joint in local space.
1800+
:param weight: The weight of the constraint.)",
1801+
py::arg("parent"),
1802+
py::arg("target"),
1803+
py::arg("offset") = std::nullopt,
1804+
py::arg("weight") = 1.0f)
1805+
.def(
1806+
"add_constraints",
1807+
[](mm::CameraProjectionErrorFunctionT<float>& self,
1808+
const py::array_t<int>& parent,
1809+
const py::array_t<float>& target,
1810+
const std::optional<py::array_t<float>>& offset,
1811+
const std::optional<py::array_t<float>>& weight) {
1812+
ArrayShapeValidator validator;
1813+
const int nConsIdx = -1;
1814+
validator.validate(parent, "parent", {nConsIdx}, {"n_cons"});
1815+
validator.validate(target, "target", {nConsIdx, 2}, {"n_cons", "xy"});
1816+
validateJointIndex(parent, "parent", self.getSkeleton());
1817+
validator.validate(offset, "offset", {nConsIdx, 3}, {"n_cons", "xyz"});
1818+
validator.validate(weight, "weight", {nConsIdx}, {"n_cons"});
1819+
1820+
auto parentAcc = parent.unchecked<1>();
1821+
auto targetAcc = target.unchecked<2>();
1822+
auto offsetAcc =
1823+
offset.has_value() ? std::make_optional(offset->unchecked<2>()) : std::nullopt;
1824+
auto weightAcc =
1825+
weight.has_value() ? std::make_optional(weight->unchecked<1>()) : std::nullopt;
1826+
1827+
py::gil_scoped_release release;
1828+
1829+
std::vector<mm::ProjectionConstraintT<float>> constraints;
1830+
constraints.reserve(parent.shape(0));
1831+
for (py::ssize_t i = 0; i < parent.shape(0); ++i) {
1832+
mm::ProjectionConstraintT<float> constraint;
1833+
constraint.parent = parentAcc(i);
1834+
constraint.target = Eigen::Vector2f(targetAcc(i, 0), targetAcc(i, 1));
1835+
constraint.offset = offsetAcc.has_value()
1836+
? Eigen::Vector3f((*offsetAcc)(i, 0), (*offsetAcc)(i, 1), (*offsetAcc)(i, 2))
1837+
: Eigen::Vector3f::Zero();
1838+
constraint.weight = weightAcc.has_value() ? (*weightAcc)(i) : 1.0f;
1839+
constraints.push_back(constraint);
1840+
}
1841+
self.setConstraints(std::move(constraints));
1842+
},
1843+
R"(Adds multiple camera projection constraints.
1844+
1845+
:param parent: A numpy array of size n for the indices of the parent joints.
1846+
:param target: A numpy array of shape (n, 2) for the 2D target pixel positions.
1847+
:param offset: A numpy array of shape (n, 3) for the offsets from the parent joints in local space.
1848+
:param weight: A numpy array of size n for the weights of the constraints.)",
1849+
py::arg("parent"),
1850+
py::arg("target"),
1851+
py::arg("offset") = std::nullopt,
1852+
py::arg("weight") = std::nullopt)
1853+
.def(
1854+
"clear_constraints",
1855+
&mm::CameraProjectionErrorFunctionT<float>::clearConstraints,
1856+
"Clears all camera projection constraints.")
1857+
.def(
1858+
"num_constraints",
1859+
&mm::CameraProjectionErrorFunctionT<float>::numConstraints,
1860+
"Returns the number of constraints.")
1861+
.def_property_readonly(
1862+
"constraints",
1863+
&mm::CameraProjectionErrorFunctionT<float>::getConstraints,
1864+
"Returns the list of camera projection constraints.");
1865+
}
1866+
16551867
} // namespace
16561868

16571869
void addErrorFunctions(py::module_& m) {
@@ -2698,6 +2910,9 @@ rotation matrix to a target rotation.)")
26982910
// Projection error function
26992911
defProjectionErrorFunction(m);
27002912

2913+
// Camera projection error function
2914+
defCameraProjectionErrorFunction(m);
2915+
27012916
// Vertex Projection error function
27022917
defVertexProjectionErrorFunction(m);
27032918

pymomentum/solver2/solver2_pybind.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ PYBIND11_MODULE(solver2, m) {
117117

118118
pybind11::module_::import(
119119
"pymomentum.geometry"); // @dep=fbsource//arvr/libraries/pymomentum:geometry
120+
pybind11::module_::import("pymomentum.camera"); // @dep=fbsource//arvr/libraries/pymomentum:camera
120121

121122
// Error functions:
122123
py::class_<mm::SkeletonErrorFunction, std::shared_ptr<mm::SkeletonErrorFunction>>(

0 commit comments

Comments
 (0)