// This module is a replacement for pyexiv2 in the mirage image viewer. // // pyexiv2 is a Python 2 binding to the exiv2, a popular library for reading // EXIF, IPTC and XMP metadata from image files. mirage 1.0_pre1 and later uses // it to get some basic metadata like image orientation, camera model and some // other tags related to picture-taking. This library is not required, but is // nice to have. The problem is that pyexiv2 depends on an outdated version of // exiv2, so it can't be compiled with the latest one (0.27.2). I had the // following choices: // // 1. Use libgexiv2, it provides Python bindings through PyGObject. The problem // is that PyGObject uses GTK3 and mirage is written with GTK2 (PyGTK), this // would require porting the entire application to GTK3 and this is not a // very straigh-forward solution. // 2. Use py3exiv2 which supports only Python 3. But PyGTK does not support // Python 3. So again, I'd need to port mirage to GTK3 and this has the // exact same problems that solution #1 has. // 3. Use GObject 2 which works on Python 2. However, libgexiv2 doesn't provide // bindings for GObject 2. // 4. Patch pyexiv2 so that it can be compiled with modern exiv2. This is a // very hard task for me since I don't have much experience with C/C++, // Boost.Python and Python internals. And nobody needs this binding nowadays // anyway. // 5. Create a replacement bindings for exiv2 just for this application. I // tried to do this, but quickly abandoned this idea because C++ is just too // hard for me. // // Then I examined the source code of mirage and noticed that it only uses EXIF // metadata and ignores IPTC and XMP. I quickly found a popular C library for // parsing EXIF called libexif. "Why not use a Python library?" you might ask. // Well, for two reasons: // // a) I was packaging mirage for latest version of mirage (1.0_pre2) for AUR // (Arch User Repository) and decided to use a well-known library which can // be found in official Arch Linux repositories and is used by other popular // applications. libexif is a perfect candidate because it is in the 'extra' // repo and is used by GIMP, PCManFM, Thunar, Ristretto and (indirectly) // Mono. // b) libexif is written in C and I wrote C for Arduino in the past, plus I // have a lot of experience with Rust. It ain't much, but I wanted to learn // something new. // // So, I decided to write a Python extension in C that is a binding to libexif // that allows you only to read metadata of an image file (because that's the // only feature mirage needs). // Useful Links: // https://libexif.github.io/api/index.html // https://github.com/libexif/libexif // https://docs.python.org/2/extending/extending.html // https://docs.python.org/2/c-api/concrete.html // https://pythonextensionpatterns.readthedocs.io/en/latest/refcount.html // https://www.exif.org/Exif2-2.PDF #include #include // Converts a numeric value stored in an EXIF entry to a Python object. Returns // NULL If a non-numeric format is provided. PyObject* mirage_exif_number_to_py(const unsigned char* ptr, ExifFormat format, ExifByteOrder bo) { PyObject* py_num = NULL; switch (format) { // Size of all EXIF integers is <= 32 bits, so they can be safely stored in // PyInt. All numeric types are signed by default and 'S' in type names // means 'signed'. // 8-bit case EXIF_FORMAT_BYTE: case EXIF_FORMAT_SBYTE: py_num = PyInt_FromLong(*ptr); break; // 16-bit case EXIF_FORMAT_SHORT: py_num = PyInt_FromLong(exif_get_short(ptr, bo)); break; case EXIF_FORMAT_SSHORT: py_num = PyInt_FromLong(exif_get_sshort(ptr, bo)); break; // 32-bit case EXIF_FORMAT_LONG: py_num = PyInt_FromLong(exif_get_long(ptr, bo)); break; case EXIF_FORMAT_SLONG: py_num = PyInt_FromLong(exif_get_slong(ptr, bo)); break; // rational numbers consist of two LONGs, they are represented as a PyTuple // with two PyInts // TODO: can code duplication be reduced here? case EXIF_FORMAT_RATIONAL: { ExifRational r = exif_get_rational(ptr, bo); PyObject* n = PyInt_FromLong(r.numerator); PyObject* d = PyInt_FromLong(r.denominator); py_num = PyTuple_Pack(2, n, d); Py_DECREF(n); Py_DECREF(d); } break; case EXIF_FORMAT_SRATIONAL: { ExifSRational r = exif_get_srational(ptr, bo); PyObject* n = PyInt_FromLong(r.numerator); PyObject* d = PyInt_FromLong(r.denominator); py_num = PyTuple_Pack(2, n, d); Py_DECREF(n); Py_DECREF(d); } break; } return py_num; } // Converts contents of an EXIF entry to a Python object. PyObject* mirage_exif_entry_to_py(ExifEntry* e, ExifByteOrder byte_order) { PyObject* py_value = NULL; switch (e->format) { case EXIF_FORMAT_BYTE: case EXIF_FORMAT_SBYTE: case EXIF_FORMAT_SHORT: case EXIF_FORMAT_SSHORT: case EXIF_FORMAT_LONG: case EXIF_FORMAT_SLONG: case EXIF_FORMAT_RATIONAL: case EXIF_FORMAT_SRATIONAL: // numeric values are stored in a PyList even if e->components == 1 to // reduce code complexity py_value = PyList_New(e->components); unsigned char format_size = exif_format_get_size(e->format); for (Py_ssize_t i = 0; i < e->components; i++) { const unsigned char* ptr = e->data + i*format_size; PyObject* py_num = mirage_exif_number_to_py(ptr, e->format, byte_order); // Note that PyList_SetItem does not increase refcount of added item // (py_num in this case), documentation says that it "steals" a // reference to it, so refcount shouldn't be decremented here. PyList_SetItem(py_value, i, py_num); } break; case EXIF_FORMAT_ASCII: // ASCII entries contain a null-terminated string of bytes with 7-bit // ASCII codes. 1 is subtracted here to chop off the last null ('\0') // byte. py_value = PyString_FromStringAndSize(e->data, e->size ? e->size - 1 : 0); break; default: // Content of an entry with an unknown format (including // EXIF_FORMAT_UNDEFINED) is simply converted to a PyByteArray. py_value = PyByteArray_FromStringAndSize(e->data, e->size); } return py_value; } // Converts all entries inside an IFD (image file directory) to a PyDict with // EXIF tag names as keys and entry values (see mirage_exif_entry_to_py) as // values. PyObject* mirage_exif_ifd_entries_to_py(ExifContent* c, ExifIfd ifd, ExifByteOrder byte_order) { PyObject* py_entries = PyDict_New(); for (Py_ssize_t i = 0; i < c->count; i++) { ExifEntry* entry = c->entries[i]; if (!entry) continue; const char* tag_name = exif_tag_get_name_in_ifd(entry->tag, ifd); PyObject* py_entry_value = mirage_exif_entry_to_py(entry, byte_order); PyDict_SetItemString(py_entries, tag_name, py_entry_value); Py_DECREF(py_entry_value); } return py_entries; } // Reads EXIF metadata from a file and returns a PyDict with IFD names ('EXIF', // '0', '1', 'GPS' etc, see EXIF specification) as keys and PyDicts of IFD // entries (see mirage_exif_ifd_entries_to_py) as values. PyObject* mirage_exif_read_metadata(PyObject* self, PyObject* args) { const char* filename; int filename_len; if (!PyArg_ParseTuple(args, "s#", &filename, &filename_len)) return NULL; ExifData* data = exif_data_new_from_file(filename); if (!data) Py_RETURN_NONE; ExifByteOrder byte_order = exif_data_get_byte_order(data); PyObject* py_dict = PyDict_New(); for (ExifIfd ifd = 0; ifd < EXIF_IFD_COUNT; ifd++) { ExifContent* content = data->ifd[ifd]; if (!content) continue; const char* ifd_name = exif_ifd_get_name(ifd); PyObject* py_entries = mirage_exif_ifd_entries_to_py(content, ifd, byte_order); PyDict_SetItemString(py_dict, ifd_name, py_entries); Py_DECREF(py_entries); } exif_data_unref(data); return py_dict; } PyMethodDef methods[] = { {"read_metadata", mirage_exif_read_metadata, METH_VARARGS}, {NULL, NULL, 0}, }; void initexif(void) { Py_InitModule("exif", methods); }