tesseract
3.03
|
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "allheaders.h"
#include "boxchar.h"
#include "commandlineflags.h"
#include "degradeimage.h"
#include "errcode.h"
#include "fileio.h"
#include "normstrngs.h"
#include "stringrenderer.h"
#include "tlog.h"
#include "unicharset.h"
#include "util.h"
Go to the source code of this file.
Classes | |
struct | tesseract::SpacingProperties |
Namespaces | |
namespace | tesseract |
Functions | |
STRING_PARAM_FLAG (text,"","File name of text input to process") | |
STRING_PARAM_FLAG (outputbase,"","Basename for output image/box file") | |
BOOL_PARAM_FLAG (degrade_image, true,"Degrade rendered image with speckle noise, dilation/erosion ""and rotation") | |
INT_PARAM_FLAG (exposure, 0,"Exposure level in photocopier") | |
INT_PARAM_FLAG (resolution, 300,"Pixels per inch") | |
INT_PARAM_FLAG (xsize, 3600,"Width of output image") | |
INT_PARAM_FLAG (ysize, 4800,"Height of output image") | |
INT_PARAM_FLAG (margin, 100,"Margin round edges of image") | |
INT_PARAM_FLAG (ptsize, 12,"Size of printed text") | |
DOUBLE_PARAM_FLAG (char_spacing, 0,"Inter-character space in ems") | |
INT_PARAM_FLAG (leading, 12,"Inter-line space (in pixels)") | |
STRING_PARAM_FLAG (writing_mode,"horizontal","Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright.") | |
INT_PARAM_FLAG (box_padding, 0,"Padding around produced bounding boxes") | |
BOOL_PARAM_FLAG (strip_unrenderable_words, false,"Remove unrenderable words from source text") | |
STRING_PARAM_FLAG (font,"Arial","Font description name to use") | |
BOOL_PARAM_FLAG (ligatures, false,"Rebuild and render ligatures") | |
BOOL_PARAM_FLAG (find_fonts, false,"Search for all fonts that can render the text") | |
BOOL_PARAM_FLAG (render_per_font, true,"If find_fonts==true, render each font to its own image. ""Image filenames are of the form output_name.font_name.tif") | |
BOOL_PARAM_FLAG (list_available_fonts, false,"List available fonts and quit.") | |
BOOL_PARAM_FLAG (render_ngrams, false,"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line).") | |
BOOL_PARAM_FLAG (output_word_boxes, false,"Output word bounding boxes instead of character boxes. ""This is used for Cube training, and implied by ""--render_ngrams.") | |
STRING_PARAM_FLAG (unicharset_file,"","File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is specified, ngrams with"" characters that are not in unicharset will be omitted") | |
BOOL_PARAM_FLAG (bidirectional_rotation, false,"Rotate the generated characters both ways.") | |
BOOL_PARAM_FLAG (only_extract_font_properties, false,"Assumes that the input file contains a list of ngrams. Renders"" each ngram, extracts spacing properties and records them in"" output_base/[font_name].fontinfo file.") | |
BOOL_PARAM_FLAG (output_individual_glyph_images, false,"If true also outputs individual character images") | |
INT_PARAM_FLAG (glyph_resized_size, 0,"Each glyph is square with this side length in pixels") | |
INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0,"Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad") | |
void | tesseract::ExtractFontProperties (const string &utf8_text, StringRenderer *render, const string &output_base) |
bool | tesseract::MakeIndividualGlyphs (Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page) |
int | main (int argc, char **argv) |
BOOL_PARAM_FLAG | ( | degrade_image | , |
true | , | ||
"Degrade rendered image with speckle | noise, | ||
dilation/erosion""and rotation" | |||
) |
BOOL_PARAM_FLAG | ( | strip_unrenderable_words | , |
false | , | ||
"Remove unrenderable words from source text" | |||
) |
BOOL_PARAM_FLAG | ( | ligatures | , |
false | , | ||
"Rebuild and render ligatures" | |||
) |
BOOL_PARAM_FLAG | ( | find_fonts | , |
false | , | ||
"Search for all fonts that can render the text" | |||
) |
BOOL_PARAM_FLAG | ( | render_per_font | , |
true | , | ||
"If | find_fonts = =true , |
||
render each font to its own image.""Image filenames are of the form output_name.font_name.tif" | |||
) |
BOOL_PARAM_FLAG | ( | list_available_fonts | , |
false | , | ||
"List available fonts and quit." | |||
) |
BOOL_PARAM_FLAG | ( | render_ngrams | , |
false | , | ||
"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line)." | |||
) |
BOOL_PARAM_FLAG | ( | output_word_boxes | , |
false | , | ||
"Output word bounding boxes instead of character boxes. ""This is used for Cube | training, | ||
and implied by""--render_ngrams." | |||
) |
BOOL_PARAM_FLAG | ( | bidirectional_rotation | , |
false | , | ||
"Rotate the generated characters both ways." | |||
) |
BOOL_PARAM_FLAG | ( | only_extract_font_properties | , |
false | , | ||
"Assumes that the input file contains a list of ngrams. Renders"" each | ngram, | ||
extracts spacing properties and records them in""output_base/.fontinfo file." | [font_name] | ||
) |
BOOL_PARAM_FLAG | ( | output_individual_glyph_images | , |
false | , | ||
"If true also outputs individual character images" | |||
) |
DOUBLE_PARAM_FLAG | ( | char_spacing | , |
0 | , | ||
"Inter-character space in ems" | |||
) |
INT_PARAM_FLAG | ( | exposure | , |
0 | , | ||
"Exposure level in photocopier" | |||
) |
INT_PARAM_FLAG | ( | resolution | , |
300 | , | ||
"Pixels per inch" | |||
) |
INT_PARAM_FLAG | ( | xsize | , |
3600 | , | ||
"Width of output image" | |||
) |
INT_PARAM_FLAG | ( | ysize | , |
4800 | , | ||
"Height of output image" | |||
) |
INT_PARAM_FLAG | ( | margin | , |
100 | , | ||
"Margin round edges of image" | |||
) |
INT_PARAM_FLAG | ( | ptsize | , |
12 | , | ||
"Size of printed text" | |||
) |
INT_PARAM_FLAG | ( | leading | , |
12 | , | ||
"Inter-line space (in pixels)" | |||
) |
INT_PARAM_FLAG | ( | box_padding | , |
0 | , | ||
"Padding around produced bounding boxes" | |||
) |
INT_PARAM_FLAG | ( | glyph_resized_size | , |
0 | , | ||
"Each glyph is square with this side length in pixels" | |||
) |
INT_PARAM_FLAG | ( | glyph_num_border_pixels_to_pad | , |
0 | |||
) |
int main | ( | int | argc, |
char ** | argv | ||
) |
---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------
Definition at line 397 of file text2image.cpp.
{ tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); if (FLAGS_list_available_fonts) { const vector<string>& all_fonts = FontUtils::ListAvailableFonts(); for (int i = 0; i < all_fonts.size(); ++i) { tprintf("%3d: %s\n", i, all_fonts[i].c_str()); ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()), "Font %s is unrecognized.\n", all_fonts[i].c_str()); } return EXIT_SUCCESS; } // Check validity of input flags. ASSERT_HOST_MSG(!FLAGS_text.empty(), "Text file missing!\n"); ASSERT_HOST_MSG(!FLAGS_outputbase.empty(), "Output file missing!\n"); ASSERT_HOST_MSG(FLAGS_render_ngrams || FLAGS_unicharset_file.empty(), "Use --unicharset_file only if --render_ngrams is set.\n"); ASSERT_HOST_MSG(FLAGS_find_fonts || FontUtils::IsAvailableFont(FLAGS_font.c_str()), "Could not find font named %s\n", FLAGS_font.c_str()); if (FLAGS_render_ngrams) FLAGS_output_word_boxes = true; char font_desc_name[1024]; snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(), static_cast<int>(FLAGS_ptsize)); StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize); render.set_add_ligatures(FLAGS_ligatures); render.set_leading(FLAGS_leading); render.set_resolution(FLAGS_resolution); render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize); render.set_h_margin(FLAGS_margin); render.set_v_margin(FLAGS_margin); render.set_output_word_boxes(FLAGS_output_word_boxes); render.set_box_padding(FLAGS_box_padding); render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words); // Set text rendering orientation and their forms. if (FLAGS_writing_mode == "horizontal") { // Render regular horizontal text (default). render.set_vertical_text(false); render.set_gravity_hint_strong(false); render.set_render_fullwidth_latin(false); } else if (FLAGS_writing_mode == "vertical") { // Render vertical text. Glyph orientation is selected by Pango. render.set_vertical_text(true); render.set_gravity_hint_strong(false); render.set_render_fullwidth_latin(false); } else if (FLAGS_writing_mode == "vertical-upright") { // Render vertical text. Glyph orientation is set to be upright. // Also Basic Latin characters are converted to their fullwidth forms // on rendering, since fullwidth Latin characters are well designed to fit // vertical text lines, while .box files store halfwidth Basic Latin // unichars. render.set_vertical_text(true); render.set_gravity_hint_strong(true); render.set_render_fullwidth_latin(true); } else { TLOG_FATAL("Invalid writing mode : %s\n", FLAGS_writing_mode.c_str()); } string src_utf8; // This c_str is NOT redundant! File::ReadFileToStringOrDie(FLAGS_text.c_str(), &src_utf8); // Remove the unicode mark if present. if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) { src_utf8.erase(0, 3); } tlog(1, "Render string of size %d\n", src_utf8.length()); if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) { // Try to preserve behavior of old text2image by expanding inter-word // spaces by a factor of 4. const string kSeparator = FLAGS_render_ngrams ? " " : " "; // Also restrict the number of charactes per line to try and avoid // line-breaking in the middle of words like "-A", "R$" etc. which are // otherwise allowed by the standard unicode line-breaking rules. const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100; string rand_utf8; UNICHARSET unicharset; if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() && !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) { TLOG_FATAL("Failed to load unicharset from file %s\n", FLAGS_unicharset_file.c_str()); } // If we are rendering ngrams that will be OCRed later, shuffle them so that // tesseract does not have difficulties finding correct baseline, word // spaces, etc. const char *str8 = src_utf8.c_str(); int len = src_utf8.length(); int step; vector<pair<int, int> > offsets; int offset = SpanUTF8Whitespace(str8); while (offset < len) { step = SpanUTF8NotWhitespace(str8 + offset); offsets.push_back(make_pair(offset, step)); offset += step; offset += SpanUTF8Whitespace(str8 + offset); } if (FLAGS_render_ngrams) std::random_shuffle(offsets.begin(), offsets.end()); for (int i = 0, line = 1; i < offsets.size(); ++i) { const char *curr_pos = str8 + offsets[i].first; int ngram_len = offsets[i].second; // Skip words that contain characters not in found in unicharset. if (!FLAGS_unicharset_file.empty() && !unicharset.encodable_string(curr_pos, NULL)) { continue; } rand_utf8.append(curr_pos, ngram_len); if (rand_utf8.length() > line * kCharsPerLine) { rand_utf8.append(" \n"); ++line; if (line & 0x1) rand_utf8.append(kSeparator); } else { rand_utf8.append(kSeparator); } } tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length()); src_utf8.swap(rand_utf8); } if (FLAGS_only_extract_font_properties) { tprintf("Extracting font properties only\n"); ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str()); tprintf("Done!\n"); return 0; } int im = 0; vector<float> page_rotation; const char* to_render_utf8 = src_utf8.c_str(); // We use a two pass mechanism to rotate images in both direction. // The first pass(0) will rotate the images in random directions and // the second pass(1) will mirror those rotations. int num_pass = FLAGS_bidirectional_rotation ? 2 : 1; for (int pass = 0; pass < num_pass; ++pass) { int page_num = 0; string font_used; for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) { tlog(1, "Starting page %d\n", im); Pix* pix = NULL; if (FLAGS_find_fonts) { offset += render.RenderAllFontsToImage(to_render_utf8 + offset, strlen(to_render_utf8 + offset), &font_used, &pix); } else { offset += render.RenderToImage(to_render_utf8 + offset, strlen(to_render_utf8 + offset), &pix); } if (pix != NULL) { float rotation = 0; if (pass == 1) { // Pass 2, do mirror rotation. rotation = -1 * page_rotation[page_num]; } if (FLAGS_degrade_image) { pix = DegradeImage(pix, FLAGS_exposure, &rotation); } render.RotatePageBoxes(rotation); if (pass == 0) { // Pass 1, rotate randomly and store the rotation.. page_rotation.push_back(rotation); } Pix* gray_pix = pixConvertTo8(pix, false); pixDestroy(&pix); Pix* binary = pixThresholdToBinary(gray_pix, 128); pixDestroy(&gray_pix); char tiff_name[1024]; if (FLAGS_find_fonts && FLAGS_render_per_font) { string fontname_for_file = tesseract::StringReplace( font_used, " ", "_"); snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(), fontname_for_file.c_str()); pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w"); } else { snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str()); pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a"); } tprintf("Rendered page %d to file %s\n", im, tiff_name); // Make individual glyphs if (FLAGS_output_individual_glyph_images) { if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) { tprintf("ERROR: Individual glyphs not saved\n"); } } pixDestroy(&binary); } } } if (!FLAGS_find_fonts) { string box_name = FLAGS_outputbase.c_str(); box_name += ".box"; render.WriteAllBoxes(box_name); } return 0; }
STRING_PARAM_FLAG | ( | text | , |
"" | , | ||
"File name of text input to process" | |||
) |
STRING_PARAM_FLAG | ( | outputbase | , |
"" | , | ||
"Basename for output image/box file" | |||
) |
STRING_PARAM_FLAG | ( | writing_mode | , |
"horizontal" | , | ||
"Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright." | |||
) |
STRING_PARAM_FLAG | ( | font | , |
"Arial" | , | ||
"Font description name to use" | |||
) |
STRING_PARAM_FLAG | ( | unicharset_file | , |
"" | , | ||
"File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is | specified, | ||
ngrams with""characters that are not in unicharset will be omitted" | |||
) |