Skip to content

Commit

Permalink
Merge pull request #57 from kevincon/tesseract3.03
Browse files Browse the repository at this point in the history
Updated Tesseract to 3.03 and Leptonica to 1.70 + memory leak fix
  • Loading branch information
g8production committed Nov 13, 2014
2 parents 62e1af2 + 49ec849 commit 2f07010
Show file tree
Hide file tree
Showing 343 changed files with 41,190 additions and 2,122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

// This NSDictionary uses NSValue encoded CGRects as keys and the recognized character (NSString) as the value
// CGRects are in UIKit's coordinate space (origin is in the top left)
@property (nonatomic, readonly) NSDictionary *characterBoxes;
@property (nonatomic, readonly) NSArray *characterBoxes;

@property (nonatomic, readonly) NSArray *getConfidenceByWord;
@property (nonatomic, readonly) NSArray *getConfidenceBySymbol;
Expand Down
Binary file modified Products/TesseractOCR.framework/Versions/A/Resources/Info.plist
Binary file not shown.
Binary file modified Products/TesseractOCR.framework/Versions/A/TesseractOCR
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@
GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = "Template Framework Project/Template Framework Project-Prefix.pch";
INFOPLIST_FILE = "Template Framework Project/Template Framework Project-Info.plist";
ONLY_ACTIVE_ARCH = YES;
PRODUCT_NAME = "$(TARGET_NAME)";
WRAPPER_EXTENSION = app;
};
Expand All @@ -324,6 +325,7 @@
GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = "Template Framework Project/Template Framework Project-Prefix.pch";
INFOPLIST_FILE = "Template Framework Project/Template Framework Project-Info.plist";
ONLY_ACTIVE_ARCH = NO;
PRODUCT_NAME = "$(TARGET_NAME)";
WRAPPER_EXTENSION = app;
};
Expand Down
12 changes: 7 additions & 5 deletions Tesseract OCR iOS.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@
64F74CC1172FD75F0068E657 /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 0510;
LastUpgradeCheck = 0600;
ORGANIZATIONNAME = "Daniele Galiotto - www.g8production.com";
};
buildConfigurationList = 64F74CC4172FD75F0068E657 /* Build configuration list for PBXProject "Tesseract OCR iOS" */;
Expand Down Expand Up @@ -899,12 +899,12 @@
LINK_WITH_STANDARD_LIBRARIES = NO;
MACH_O_TYPE = mh_object;
MACOSX_DEPLOYMENT_TARGET = 10.8;
ONLY_ACTIVE_ARCH = NO;
ONLY_ACTIVE_ARCH = YES;
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";
SDKROOT = iphoneos;
TARGETED_DEVICE_FAMILY = "1,2";
VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) x86_64 i386";
VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) i386 x86_64";
WRAPPER_EXTENSION = framework;
};
name = Debug;
Expand Down Expand Up @@ -933,11 +933,12 @@
LINK_WITH_STANDARD_LIBRARIES = NO;
MACH_O_TYPE = mh_object;
MACOSX_DEPLOYMENT_TARGET = 10.8;
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = "-ObjC";
PRODUCT_NAME = "$(TARGET_NAME)";
SDKROOT = iphoneos;
TARGETED_DEVICE_FAMILY = "1,2";
VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) x86_64 i386";
VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) i386 x86_64";
WRAPPER_EXTENSION = framework;
};
name = Release;
Expand All @@ -962,6 +963,7 @@
buildSettings = {
DEAD_CODE_STRIPPING = YES;
LINK_WITH_STANDARD_LIBRARIES = YES;
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = (
"-ObjC",
"-lstdc++",
Expand Down Expand Up @@ -996,7 +998,7 @@
GCC_WARN_UNINITIALIZED_AUTOS = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 5.0;
ONLY_ACTIVE_ARCH = NO;
ONLY_ACTIVE_ARCH = YES;
OTHER_LDFLAGS = (
"-ObjC",
"-lstdc++",
Expand Down
2 changes: 1 addition & 1 deletion TesseractOCR/Tesseract.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

// This NSDictionary uses NSValue encoded CGRects as keys and the recognized character (NSString) as the value
// CGRects are in UIKit's coordinate space (origin is in the top left)
@property (nonatomic, readonly) NSDictionary *characterBoxes;
@property (nonatomic, readonly) NSArray *characterBoxes;

@property (nonatomic, readonly) NSArray *getConfidenceByWord;
@property (nonatomic, readonly) NSArray *getConfidenceBySymbol;
Expand Down
45 changes: 31 additions & 14 deletions TesseractOCR/Tesseract.mm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#import "environ.h"
#import "pix.h"
#import "ocrclass.h"
#import "allheaders.h"

namespace tesseract {
class TessBaseAPI;
Expand All @@ -23,7 +24,8 @@ @interface Tesseract () {
NSString* _language;
NSMutableDictionary* _variables;
tesseract::TessBaseAPI* _tesseract;
const UInt8 *_pixels;
//const UInt8 *_pixels;
Pix *currentPix;
ETEXT_DESC *_monitor;
}

Expand Down Expand Up @@ -69,6 +71,9 @@ - (void)dealloc {
delete _tesseract;
_tesseract = nullptr;
}
if (currentPix != nullptr) {
pixDestroy(&currentPix);
}
}

- (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language {
Expand Down Expand Up @@ -204,7 +209,7 @@ - (void)setImage:(UIImage *)image {

CGImage *cgImage = image.CGImage;
CFDataRef data = CGDataProviderCopyData(CGImageGetDataProvider(cgImage));
_pixels = CFDataGetBytePtr(data);
const UInt8 *_pixels = CFDataGetBytePtr(data);

size_t bitsPerComponent = CGImageGetBitsPerComponent(cgImage);
size_t bitsPerPixel = CGImageGetBitsPerPixel(cgImage);
Expand All @@ -215,7 +220,11 @@ - (void)setImage:(UIImage *)image {
assert(bytesPerRow < MAX_INT32);
{
imageThresholder->SetImage(_pixels,width,height,(int)(bitsPerPixel/bitsPerComponent),(int)bytesPerRow);
_tesseract->SetImage(imageThresholder->GetPixRect());
if (currentPix != nullptr) {
pixDestroy(&currentPix);
}
currentPix = imageThresholder->GetPixRect();
_tesseract->SetImage(currentPix);
}

imageThresholder->Clear();
Expand All @@ -240,11 +249,15 @@ - (NSString *)recognizedText {
return text;
}

- (NSDictionary *)characterBoxes {
NSMutableDictionary *recognizedTextBoxes = [NSMutableDictionary dictionary];
- (NSArray *)characterBoxes {
NSMutableArray *recognizedTextBoxes = [[NSMutableArray alloc] init];

// Get box info
char* boxText = _tesseract->GetBoxText(0);
if (!boxText) {
NSLog(@"No boxes recognized. Check that -[Tesseract setImage:] is passed an image bigger than 0x0.");
return nil;
}
NSString *stringBoxes = [NSString stringWithUTF8String:boxText];
delete [] boxText;

Expand All @@ -262,7 +275,10 @@ - (NSDictionary *)characterBoxes {
CGFloat width = [boxComponents[3] floatValue] - [boxComponents[1] floatValue];
CGFloat height = [boxComponents[4] floatValue] - [boxComponents[2] floatValue];
CGRect box = CGRectMake(x, y, width, height);
[recognizedTextBoxes setObject:boxComponents[0] forKey:[NSValue valueWithCGRect:box]];
NSMutableDictionary *resultDict = [[NSMutableDictionary alloc] init];
resultDict[@"text"] = boxComponents[0];
resultDict[@"box"] = [NSValue valueWithCGRect:box];
[recognizedTextBoxes addObject: resultDict];
}
}
return recognizedTextBoxes;
Expand Down Expand Up @@ -294,14 +310,15 @@ - (NSArray *)getConfidences:(tesseract::PageIteratorLevel)level {
CGRect box = CGRectMake(x, y, width, height);

word = ri->GetUTF8Text(level);
conf = ri->Confidence(level);

[array addObject:@{
@"text": [NSString stringWithUTF8String:word],
@"confidence": [NSNumber numberWithFloat:conf],
@"boundingbox": [NSValue valueWithCGRect:box]
}];

if (word != NULL) {
conf = ri->Confidence(level);

[array addObject:@{
@"text": [NSString stringWithUTF8String:word],
@"confidence": [NSNumber numberWithFloat:conf],
@"boundingbox": [NSValue valueWithCGRect:box]
}];
}
delete[] word;
} while (ri->Next(level));
}
Expand Down
Loading

0 comments on commit 2f07010

Please sign in to comment.