Merge pull request #29141 from danielfullmer/k2pdfopt

k2pdfopt: 2.32 -> 2.42
This commit is contained in:
Joachim F 2017-09-15 16:57:44 +00:00 committed by GitHub
commit 0504dd6fb0
4 changed files with 173 additions and 170 deletions

View File

@ -8,7 +8,18 @@ stdenv.mkDerivation rec {
sha256 = "1dgmcpapy7h68d53q2c5d0bpgzgfb2nw2blndnx9qhc7z12149mw";
};
buildFlags = [ "all" "libs" ];
installFlags = [ "libdir=/lib/" ]; # Specify libdir so Makefile will also install library.
preInstall = "mkdir -p $out/lib";
postInstall = ''
for i in pgm2asc.h gocr.h; do
install -D -m644 src/$i $out/include/gocr/$i
done
'';
preFixup = ''
sed -i -e 's|exec wish|exec ${tk}/bin/wish|' $out/bin/gocr.tcl
'';

View File

@ -1,105 +1,92 @@
# Build procedure lifted from https://aur.archlinux.org/packages/k2/k2pdfopt/PKGBUILD
{ stdenv, fetchzip, fetchurl, writeScript, libX11, libXext, autoconf, automake, libtool
, leptonica, libpng, libtiff, zlib, openjpeg, freetype, jbig2dec, djvulibre
, openssl }:
{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig
, zlib, libpng
, enableGSL ? true, gsl
, enableGhostScript ? true, ghostscript
, enableMuPDF ? true, jbig2dec, openjpeg, freetype, harfbuzz, mupdf
, enableJPEG2K ? true, jasper
, enableDJVU ? true, djvulibre
, enableGOCR ? false, gocr # Disabled by default due to crashes
, enableTesseract ? true, leptonica, tesseract
}:
let
mupdf_src = fetchurl {
url = http://www.mupdf.com/downloads/archive/mupdf-1.6-source.tar.gz;
sha256 = "0qx51rj6alzcagcixm59rvdpm54w6syrwr4184v439jh14ryw4wq";
};
with stdenv.lib;
tess_src = fetchurl {
url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz;
sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96";
};
gocr_src = fetchurl {
url = http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.49.tar.gz;
sha256 = "06hpzp7rkkwfr1fvmc8kcfz9v490i9yir7f7imh13gmka0fr6afc";
};
in stdenv.mkDerivation rec {
stdenv.mkDerivation rec {
name = "k2pdfopt-${version}";
version = "2.32";
version = "2.42";
src = fetchzip {
url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v${version}_src.zip";
sha256 = "1v3cj5bwpjvy7s66sfqcmkxs91f7nxaykjpdjm2wn87vn6q7n19m";
sha256 = "1zag4jmkr0qrcpqqb5davmvdrabhdyz87q4zz0xpfkl6xw2dn9bk";
};
buildInputs = [ libX11 libXext autoconf automake libtool leptonica libpng libtiff zlib
openjpeg freetype jbig2dec djvulibre openssl ];
NIX_LDFLAGS = "-lX11 -lXext";
patches = [ ./k2pdfopt.patch ];
hardeningDisable = [ "format" ];
nativeBuildInputs = [ cmake pkgconfig ];
k2_pa = ./k2pdfopt.patch;
tess_pa = ./tesseract.patch;
buildInputs =
let
mupdf_modded = mupdf.overrideAttrs (attrs: {
name = "mupdf-1.10a";
src = fetchurl {
url = "http://mupdf.com/downloads/archive/mupdf-1.10a-source.tar.gz";
sha256 = "0dm8wcs8i29aibzkqkrn8kcnk4q0kd1v66pg48h5c3qqp4v1zk5a";
};
# Excluded the pdf-*.c files, since they mostly just broke the #includes
prePatch = ''
cp ${src}/mupdf_mod/{font,stext-device,string}.c source/fitz/
cp ${src}/mupdf_mod/font-win32.c source/pdf/
'';
# Patches from previous 1.10a version in nixpkgs
patches = [
# Compatibility with new openjpeg
(fetchpatch {
name = "mupdf-1.9a-openjpeg-2.1.1.patch";
url = "https://git.archlinux.org/svntogit/community.git/plain/mupdf/trunk/0001-mupdf-openjpeg.patch?id=5a28ad0a8999a9234aa7848096041992cc988099";
sha256 = "1i24qr4xagyapx4bijjfksj4g3bxz8vs5c2mn61nkm29c63knp75";
})
builder = writeScript "builder.sh" ''
. ${stdenv}/setup
set -e
(fetchurl {
name = "CVE-2017-5896.patch";
url = "http://git.ghostscript.com/?p=mupdf.git;a=patch;h=2c4e5867ee699b1081527bc6c6ea0e99a35a5c27";
sha256 = "14k7x47ifx82sds1c06ibzbmcparfg80719jhgwjk6w1vkh4r693";
})
];
});
leptonica_modded = leptonica.overrideAttrs (attrs: {
prePatch = ''
cp ${src}/leptonica_mod/* src/
'';
});
tesseract_modded = tesseract.overrideAttrs (attrs: {
prePatch = ''
cp ${src}/tesseract_mod/{ambigs.cpp,ccutil.h,ccutil.cpp} ccutil/
cp ${src}/tesseract_mod/dawg.cpp api/
cp ${src}/tesseract_mod/{imagedata.cpp,tessdatamanager.cpp} ccstruct/
cp ${src}/tesseract_mod/openclwrapper.h opencl/
cp ${src}/tesseract_mod/{tessedit.cpp,thresholder.cpp} ccmain/
cp ${src}/tesseract_mod/tess_lang_mod_edge.h cube/
cp ${src}/tesseract_mod/tesscapi.cpp api/
cp ${src}/include_mod/{tesseract.h,leptonica.h} api/
'';
patches = [ ./tesseract.patch ];
});
in
[ zlib libpng ] ++
optional enableGSL gsl ++
optional enableGhostScript ghostscript ++
optionals enableMuPDF [ jbig2dec openjpeg freetype harfbuzz mupdf_modded ] ++
optionals enableJPEG2K [ jasper ] ++
optional enableDJVU djvulibre ++
optional enableGOCR gocr ++
optionals enableTesseract [ leptonica_modded tesseract_modded ];
plibs=`pwd`/patched_libraries
dontUseCmakeBuildDir = true;
tar zxf ${mupdf_src}
cp $src/mupdf_mod/font.c $src/mupdf_mod/string.c mupdf-1.6-source/source/fitz/
cp $src/mupdf_mod/pdf-* mupdf-1.6-source/source/pdf
cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ];
tar zxf ${tess_src}
cp $src/tesseract_mod/dawg.cpp tesseract-ocr/dict
cp $src/tesseract_mod/tessdatamanager.cpp tesseract-ocr/ccutil
cp $src/tesseract_mod/tessedit.cpp tesseract-ocr/ccmain
cp $src/tesseract_mod/tesscapi.cpp tesseract-ocr/api
cp $src/include_mod/tesseract.h $src/include_mod/leptonica.h tesseract-ocr/api
cp -a $src k2pdfopt_v2.21
chmod -R +w k2pdfopt_v2.21
patch -p0 -i $tess_pa
patch -p0 -i $k2_pa
cd tesseract-ocr
./autogen.sh
substituteInPlace "configure" \
--replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \
'LIBLEPT_HEADERSDIR=${leptonica}/include'
./configure --prefix=$plibs --disable-shared
make install
cd ..
tar zxf ${gocr_src}
cd gocr-0.49
./configure
cp src/{gocr.h,pnm.h,unicode.h,list.h} $plibs/include
cp include/config.h $plibs/include
make libs
cp src/libPgm2asc.a $plibs/lib
cd ../mupdf-1.6-source
make prefix=$plibs install
install -Dm644 build/debug/libmujs.a $plibs/lib
cd ../k2pdfopt_v2.21/k2pdfoptlib
gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include \
-I . -I ../willuslib
ar rcs libk2pdfopt.a *.o
cd ../willuslib
gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include
ar rcs libwillus.a *.o
cd ..
gcc -Wall -Ofast -o k2pdfopt.o -c k2pdfopt.c -I k2pdfoptlib/ -I willuslib/ \
-I include_mod/ -I $plibs/include
g++ -Ofast k2pdfopt.o -o k2pdfopt -I willuslib/ -I k2pdfoptlib/ -I include_mod/ \
-I $plibs/include -L $plibs/lib/ \
-L willuslib/ -L k2pdfoptlib/ -lk2pdfopt -lwillus -ldjvulibre -lz -lmupdf \
-ljbig2dec -ljpeg -lopenjp2 -lpng -lfreetype -lpthread -lmujs \
-lPgm2asc -llept -ltesseract -lcrypto
mkdir -p $out/bin
cp k2pdfopt $out/bin
installPhase = ''
install -D -m 755 k2pdfopt $out/bin/k2pdfopt
'';
meta = with stdenv.lib; {
@ -107,7 +94,7 @@ in stdenv.mkDerivation rec {
homepage = http://www.willus.com/k2pdfopt;
license = licenses.gpl3;
platforms = platforms.linux;
maintainers = [ maintainers.bosu ];
maintainers = with maintainers; [ bosu danielfullmer ];
};
}

View File

@ -1,95 +1,99 @@
diff -aur k2pdfopt_v2.21/willuslib/array.c k2pdfopt_v2.21.new/willuslib/array.c
--- k2pdfopt_v2.21/willuslib/array.c 2014-05-23 16:29:58.000000000 -0300
+++ k2pdfopt_v2.21.new/willuslib/array.c 2014-07-26 11:35:49.829825567 -0300
@@ -1055,7 +1055,7 @@
void arrayf_sort(float *a,int n)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4a2378b..502c477 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,7 @@ endif(JPEG_FOUND)
include(FindJasper)
if(JASPER_FOUND)
set(HAVE_JASPER_LIB 1)
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY})
endif(JASPER_FOUND)
{
- sort(a,(long)n);
+ willus_sort(a,(long)n);
}
# paths from willuslib/wgs.c
@@ -66,8 +67,12 @@ else()
message(STATUS "Could NOT find ghostscript executable")
endif(GHOSTSCRIPT_EXECUTABLE)
-# willus.h
-# HAVE_GSL_LIB
+pkg_check_modules(GSL gsl)
+if(MUPDF_FOUND)
+ set(HAVE_GSL_LIB 1)
+ include_directories(SYSTEM ${GSL_INCLUDEDIR})
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS})
+endif(MUPDF_FOUND)
diff -aur k2pdfopt_v2.21/willuslib/math.c k2pdfopt_v2.21.new/willuslib/math.c
--- k2pdfopt_v2.21/willuslib/math.c 2013-08-15 21:33:50.000000000 -0300
+++ k2pdfopt_v2.21.new/willuslib/math.c 2014-07-26 11:36:02.853170659 -0300
@@ -532,7 +532,7 @@
# libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0)
@@ -80,7 +85,7 @@ if(MUPDF_FOUND)
include_directories(SYSTEM ${MUPDF_INCLUDEDIR})
message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}")
set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS}
- -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype
+ -lopenjp2 -ljbig2dec -ljpeg -lfreetype -lharfbuzz
)
endif(MUPDF_FOUND)
@@ -91,9 +96,25 @@ if(DJVU_FOUND)
set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS})
endif(DJVU_FOUND)
-# HAVE_GOCR_LIB
-# HAVE_LEPTONICA_LIB
-# HAVE_TESSERACT_LIB
+find_library(GOCR_LIB NAMES Pgm2asc)
+if(GOCR_LIB)
+ set(HAVE_GOCR_LIB 1)
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB})
+endif(GOCR_LIB)
+
+pkg_check_modules(LEPTONICA lept)
+if(LEPTONICA_FOUND)
+ set(HAVE_LEPTONICA_LIB 1)
+ include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR})
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS})
+endif(LEPTONICA_FOUND)
+
+pkg_check_modules(TESSERACT tesseract)
+if(TESSERACT_FOUND)
+ set(HAVE_TESSERACT_LIB 1)
+ include_directories(SYSTEM ${TESSERACT_INCLUDEDIR})
+ set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS})
+endif(TESSERACT_FOUND)
-void sort(float *x,int n)
+void willus_sort(float *x,int n)
# ---- Describe project
{
int top,n1;
diff -aur k2pdfopt_v2.21/willuslib/ocrjocr.c k2pdfopt_v2.21.new/willuslib/ocrjocr.c
--- k2pdfopt_v2.21/willuslib/ocrjocr.c 2012-11-12 13:09:42.000000000 -0300
+++ k2pdfopt_v2.21.new/willuslib/ocrjocr.c 2014-07-26 11:36:46.699837185 -0300
diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt
index 463bbc9..8043db5 100644
--- a/willuslib/CMakeLists.txt
+++ b/willuslib/CMakeLists.txt
@@ -6,7 +6,7 @@ include_directories(..)
set(WILLUSLIB_SRC
ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c
fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c
- ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
+ ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c
token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c
wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c
wzfile.c wleptonica.c
diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c
index 6027e9a..fbe10f0 100644
--- a/willuslib/ocrgocr.c
+++ b/willuslib/ocrgocr.c
@@ -29,6 +29,8 @@
#ifdef HAVE_GOCR_LIB
#include <gocr.h>
+job_t *JOB;
+job_t *OCR_JOB;
+
/*
** bmp8 must be grayscale
** (x1,y1) and (x2,y2) from top left of bitmap
@@ -66,6 +68,7 @@
@@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8,
h=y2-y1+1;
dh=h+bw*2;
job=&_job;
+ JOB=job;
+ OCR_JOB=job;
job_init(job);
job_init_image(job);
// willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10);
diff -aur k2pdfopt_v2.21/willuslib/string.c k2pdfopt_v2.21.new/willuslib/string.c
--- k2pdfopt_v2.21/willuslib/string.c 2014-02-03 00:37:44.000000000 -0300
+++ k2pdfopt_v2.21.new/willuslib/string.c 2014-07-26 11:37:01.766506277 -0300
@@ -81,7 +81,7 @@
** Returns NULL if EOF, otherwise returns pointer to the string.
**
*/
-char *get_line(char *buf,int max,FILE *f)
+char *willus_get_line(char *buf,int max,FILE *f)
{
int i;
diff -aur k2pdfopt_v2.21/willuslib/willus.h k2pdfopt_v2.21.new/willuslib/willus.h
--- k2pdfopt_v2.21/willuslib/willus.h 2014-07-25 15:03:51.000000000 -0300
+++ k2pdfopt_v2.21.new/willuslib/willus.h 2014-07-26 11:37:56.316506038 -0300
@@ -214,9 +214,6 @@
** CMAKE handles the defines, not this source
** (Mod from Dirk Thierbach, 31-Dec-2013)
*/
-#ifdef USE_CMAKE
-#include "config.h"
-#else /* USE_CMAKE */
#ifndef HAVE_Z_LIB
#define HAVE_Z_LIB
@@ -268,7 +265,6 @@
#undef HAVE_GSL_LIB
#endif
-#endif /* USE_CMAKE */
/*
** Consistency check
*/
@@ -533,7 +529,7 @@
int *n,FILE *err);
int readxyz_ex (char *filename,double **x,double **y,double **z,
int *n,FILE *err,int ignore_after_semicolon);
-void sort (float *x,int n);
+void willus_sort (float *x,int n);
void sortd (double *x,int n);
void sorti (int *x,int n);
void sortxy (float *x,float *y,int n);
@@ -602,7 +598,7 @@
/* string.c */
void clean_line (char *buf);
void clean_line_end(char *buf);
-char *get_line (char *buf,int max,FILE *f);
+char *willus_get_line (char *buf,int max,FILE *f);
char *get_line_cf (char *buf,int max,FILE *f);
int mem_get_line_cf(char *buf,int maxlen,char *cptr,long *cindex,long csize);
int in_string (char *buffer,char *pattern);

View File

@ -1,12 +1,13 @@
diff -aur tesseract-ocr/api/Makefile.am tesseract-ocr.new/api/Makefile.am
--- tesseract-ocr/api/Makefile.am 2012-10-09 14:18:39.000000000 -0300
+++ tesseract-ocr.new/api/Makefile.am 2014-03-20 18:43:13.926030341 -0300
@@ -36,7 +36,7 @@
diff --git a/api/Makefile.am b/api/Makefile.am
index d8c1e54..46ead13 100644
--- a/api/Makefile.am
+++ b/api/Makefile.am
@@ -42,7 +42,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS)
if VISIBILITY
libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS
endif
-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp
+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp tesscapi.cpp
-libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp
+libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp tesscapi.cpp
lib_LTLIBRARIES += libtesseract.la
libtesseract_la_LDFLAGS =