Skip to content

Commit

Permalink
Add a Mimetype_Tester, some more mimetype stuff.
Browse files Browse the repository at this point in the history
  • Loading branch information
kohler committed Sep 7, 2023
1 parent c5807d3 commit 28aeff6
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 44 deletions.
2 changes: 1 addition & 1 deletion batch/updatedocmetadata.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ private function run_images() {
private function run_images_subset($docs) {
DocumentInfo::prefetch_content($docs, DocumentInfo::FLAG_NO_DOCSTORE);
foreach ($docs as $doc) {
$info = Mimetype::content_info($doc->content(), $doc->mimetype);
$info = Mimetype::content_info(null, $doc->mimetype, $doc);
$upd = [];
$m = $doc->metadata() ?? (object) [];
if (isset($info["width"]) && !isset($m->width)) {
Expand Down
50 changes: 41 additions & 9 deletions lib/isovideomimetype.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class ISOVideoMimetype implements JsonSerializable {
const F_FTYP = 1;
const F_MOOV = 2;
const F_MVEX = 4;
const F_ANALYZED = 8;

const TF_AUDIO = 1;
const TF_VIDEO = 2;
Expand All @@ -41,20 +42,21 @@ private function __construct() {
}

/** @param string $s
* @return VideoMimetype */
* @return ISOVideoMimetype */
static function make_string($s) {
$vm = new VideoMimetype;
$vm = new ISOVideoMimetype;
$vm->data = new ISOVideoFragment($s, 0, strlen($s));
$vm->bound = strlen($s);
return $vm;
}

/** @param string $filename
* @return VideoMimetype */
static function make_file($filename) {
$vm = new VideoMimetype;
* @param ?string $prefix
* @return ISOVideoMimetype */
static function make_file($filename, $prefix = null) {
$vm = new ISOVideoMimetype;
$vm->filename = $filename;
$s = file_get_contents($filename, false, null, 0, 32768);
$s = $prefix ?? file_get_contents($filename, false, null, 0, 32768);
$vm->data = new ISOVideoFragment($s, 0, strlen($s));
$vm->bound = @filesize($filename);
return $vm;
Expand Down Expand Up @@ -272,7 +274,12 @@ function walk_hdlr($data, $pos, $bound, $track) {
}


function walk_boxes() {
function analyze() {
if (($this->flags & self::F_ANALYZED) !== 0) {
return;
}
$this->flags |= self::F_ANALYZED;

$data = $this->data;
$pos = 0;
while (($this->flags & 3) !== 3
Expand Down Expand Up @@ -324,10 +331,10 @@ function walk_boxes() {
$this->tflags |= self::TF_ASPECT;
} else {
if ($this->width === null && $tr->width !== null) {
$this->width = $tr->width / 65536.0;
$this->width = (int) round($tr->width / 65536.0);
}
if ($this->height === null && $tr->height !== null) {
$this->height = $tr->height / 65536.0;
$this->height = (int) round($tr->height / 65536.0);
}
}
} else if ($tr->handler === 0x736f756e /* `soun` */) {
Expand All @@ -343,6 +350,31 @@ function walk_boxes() {
}
}

/** @param ?string $type
* @return array */
function content_info($type = null) {
$this->analyze();
if ($this->tflags === 0) {
return $type ? ["type" => $type] : [];
}
if ($type === null) {
if (($this->tflags & self::TF_VIDEO) !== 0) {
$type = "video/mp4";
} else {
$type = "audio/mp4";
}
}
$info = ["type" => $type];
if ($this->duration !== null) {
$info["duration"] = $this->duration;
}
if ($this->width !== null && $this->height !== null) {
$info["width"] = $this->width;
$info["height"] = $this->height;
}
return $info;
}

#[\ReturnTypeWillChange]
function jsonSerialize() {
$j = ["flags" => $this->flags];
Expand Down
24 changes: 22 additions & 2 deletions lib/mimetype.php
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,19 @@ static function content_type($content, $type = null) {

/** @param ?string $content
* @param ?string $type
* @return array{type:string,width?:int,height?:int} */
static function content_info($content, $type = null) {
* @param ?DocumentInfo $doc
* @return ?array{type:string,width?:int,height?:int} */
static function content_info($content, $type = null, $doc = null) {
if ($content === null && $doc) {
if ($doc->has_memory_content()) {
$content = $doc->content();
} else {
$content = $doc->content_prefix(4096);
}
if ($content === false) {
return null;
}
}
$content = $content ?? "";
$type = self::content_type($content, $type);
if ($type === self::JPG_TYPE) {
Expand All @@ -319,6 +330,15 @@ static function content_info($content, $type = null) {
return self::png_content_info($content);
} else if ($type === self::GIF_TYPE) {
return self::gif_content_info($content);
} else if ($type === "video/mp4") {
if ($doc
&& strlen($content) !== $doc->size()
&& ($file = $doc->content_file())) {
$ivm = ISOVideoMimetype::make_file($file, $content);
} else {
$ivm = ISOVideoMimetype::make_string($content);
}
return $ivm->content_info();
} else {
return ["type" => $type];
}
Expand Down
30 changes: 19 additions & 11 deletions src/documentinfo.php
Original file line number Diff line number Diff line change
Expand Up @@ -311,17 +311,20 @@ static function sanitize_filename($fn) {
}

function analyze_content() {
$pfx = $this->content_prefix(4096);
if ($pfx === false) {
return;
}
$info = Mimetype::content_info($pfx, $this->mimetype);
$this->mimetype = $info["type"];
if (isset($info["width"])) {
$this->set_prop("width", $info["width"]);
}
if (isset($info["height"])) {
$this->set_prop("height", $info["height"]);
if (($info = Mimetype::content_info(null, $this->mimetype, $this))) {
$this->mimetype = $info["type"];
if (isset($info["width"])) {
$this->set_prop("width", $info["width"]);
}
if (isset($info["height"])) {
$this->set_prop("height", $info["height"]);
}
if (str_starts_with($this->mimetype, "video/")
|| str_starts_with($this->mimetype, "audio/")) {
if (isset($info["duration"])) {
$this->set_prop("npages", (int) ($info["duration"] * 10 + 0.5));
}
}
}
}

Expand Down Expand Up @@ -984,6 +987,11 @@ private function _temp_content_filename() {
return Filer::$tempdir . "/" . $base . Mimetype::extension($this->mimetype);
}

/** @return bool */
function has_memory_content() {
return $this->content !== null;
}

/** @param int $prefix_len
* @return string|false */
function content_prefix($prefix_len = 4096) {
Expand Down
20 changes: 0 additions & 20 deletions test/t_documentbasics.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,24 +97,4 @@ function test_docstore_path() {
xassert_eqq(Filer::docstore_path($doc), "/foo/bar/sha2-66a/sha2-66a04/sha2-66a045b452102c59d840ec097d59d9467e13a3f34f6494e539ffd32c1bb35f18");
xassert_eqq($doc->s3_key(), "doc/66a/sha2-66a045b452102c59d840ec097d59d9467e13a3f34f6494e539ffd32c1bb35f18.txt");
}

function test_mimetype() {
xassert_eqq(Mimetype::content_type("%PDF-3.0\nwhatever\n"), Mimetype::PDF_TYPE);
// test that we can parse lib/mime.types for file extensions
xassert_eqq(Mimetype::extension("application/pdf"), ".pdf");
xassert_eqq(Mimetype::extension("image/gif"), ".gif");
xassert_eqq(Mimetype::content_type(null, "application/force"), "application/octet-stream");
xassert_eqq(Mimetype::content_type(null, "application/x-zip-compressed"), "application/zip");
xassert_eqq(Mimetype::content_type(null, "application/gz"), "application/gzip");
xassert_eqq(Mimetype::extension("application/g-zip"), ".gz");
xassert_eqq(Mimetype::type("application/download"), "application/octet-stream");
xassert_eqq(Mimetype::extension("application/smil"), ".smil");
xassert_eqq(Mimetype::type(".smil"), "application/smil");
xassert_eqq(Mimetype::type(".sml"), "application/smil");
// `fileinfo` test
xassert_eqq(Mimetype::content_type("<html><head></head><body></body></html>"), "text/html");
// test that non-PDFs are not mistaken for PDFs
xassert_eqq(Mimetype::content_type("%PDF-3.0\nwhatever\n", Mimetype::PDF_TYPE), Mimetype::PDF_TYPE);
xassert_neqq(Mimetype::content_type("PDF-3.0\nwhatever\n", Mimetype::PDF_TYPE), Mimetype::PDF_TYPE);
}
}
51 changes: 51 additions & 0 deletions test/t_mimetype.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php
// t_mimetype.php -- HotCRP tests
// Copyright (c) 2006-2023 Eddie Kohler; see LICENSE.

class Mimetype_Tester {
function test_mimetype() {
xassert_eqq(Mimetype::content_type("%PDF-3.0\nwhatever\n"), Mimetype::PDF_TYPE);
// test that we can parse lib/mime.types for file extensions
xassert_eqq(Mimetype::extension("application/pdf"), ".pdf");
xassert_eqq(Mimetype::extension("image/gif"), ".gif");
xassert_eqq(Mimetype::content_type(null, "application/force"), "application/octet-stream");
xassert_eqq(Mimetype::content_type(null, "application/x-zip-compressed"), "application/zip");
xassert_eqq(Mimetype::content_type(null, "application/gz"), "application/gzip");
xassert_eqq(Mimetype::extension("application/g-zip"), ".gz");
xassert_eqq(Mimetype::type("application/download"), "application/octet-stream");
xassert_eqq(Mimetype::extension("application/smil"), ".smil");
xassert_eqq(Mimetype::type(".smil"), "application/smil");
xassert_eqq(Mimetype::type(".sml"), "application/smil");
// `fileinfo` test
xassert_eqq(Mimetype::content_type("<html><head></head><body></body></html>"), "text/html");
// test that non-PDFs are not mistaken for PDFs
xassert_eqq(Mimetype::content_type("%PDF-3.0\nwhatever\n", Mimetype::PDF_TYPE), Mimetype::PDF_TYPE);
xassert_neqq(Mimetype::content_type("PDF-3.0\nwhatever\n", Mimetype::PDF_TYPE), Mimetype::PDF_TYPE);
}

function test_gif() {
$spacer = base64_decode("R0lGODlhAQABAIAAAAAAAAAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==");
xassert_eqq(Mimetype::content_type($spacer), Mimetype::GIF_TYPE);
$ci = Mimetype::content_info($spacer);
xassert_eqq($ci["type"], Mimetype::GIF_TYPE);
xassert_eqq($ci["width"] ?? null, 1);
xassert_eqq($ci["height"] ?? null, 1);

$icon = base64_decode("R0lGODlhVQBVAPQAAPcIWgBrMf/WtZSMSmMYKfelUggACPdSlK0AQu+MxvecUlJ7OfcQY/+1hPcYa0IAGOecWu+11oQAMfd7lLWUSiFzOecIUvc5c2uEQv+ErTl7OcYASvcha//OjPelYwAAACH5BAEAABEALAAAAABVAFUAQAU=");
xassert_eqq(Mimetype::content_type($icon), Mimetype::GIF_TYPE);
$ci = Mimetype::content_info($icon);
xassert_eqq($ci["type"], Mimetype::GIF_TYPE);
xassert_eqq($ci["width"] ?? null, 85);
xassert_eqq($ci["height"] ?? null, 85);
}

function xxx_test_mp4() {
$mt = ISOVideoMimetype::make_file("/Users/kohler/Downloads/sigcomm23-paper130-10_minute_presentation_video.mp4");
$mt->analyze();
error_log(json_encode($mt->content_info()));

$mt = ISOVideoMimetype::make_file("/Users/kohler/Downloads/sigcomm23-paper1037-10_minute_presentation_video/MoMA v1.0.3.mp4");
$mt->analyze();
error_log(json_encode($mt->content_info()));
}
}
3 changes: 2 additions & 1 deletion test/test02.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
"Search_Tester",
"Settings_Tester",
"UpdateSchema_Tester",
"Batch_Tester"
"Batch_Tester",
"Mimetype_Tester"
);

0 comments on commit 28aeff6

Please sign in to comment.