ngx-scanner-text
v1.1.0
Published
This library is built to support optical character recognition (OCR) from images provided as urls. The core is based on Tesseract, supporting over 100 national languages worldwide.
Downloads
52
Maintainers
Keywords
Readme
ngx-scanner-text
This library is built to support optical character recognition (OCR) from images provided as urls.
The core is based on Tesseract, supporting over 100 national languages worldwide.
This demo codesandbox, github.
Installation :gear:
npm install ngx-scanner-text@<version> --save
Usage :syringe:
import { NgxScannerTextModule } from "ngx-scanner-text";
@NgModule({
imports: [NgxScannerTextModule],
})
export class AppModule {}
<ngx-scanner-text #scanner="scanner" [configs]="configs" (event)="onData($event)"></ngx-scanner-text>
<button (click)="onScanOCR(scanner)">Scan</button>
<textarea>{{ scanner?.logger$ | async | json }}</textarea>
<textarea>{{ text }}</textarea>
import { ChangeDetectorRef, Component } from "@angular/core";
import { NgxScannerTextComponent, Configs, Page } from "ngx-scanner-text";
@Component({
selector: "app-root",
templateUrl: "./app.component.html",
styleUrls: ["./app.component.scss"]
})
export class AppComponent {
public text: string;
public configs: Configs = {
src: 'https://raw.githubusercontent.com/id1945/ngx-scanner-text/master/ngx-scanner-text-origin.png',
languages: ['eng'],
color: 'red',
isAuto: true,
isImage: false,
options: {
rectangle: {
left: 70,
top: 100,
width: 700,
height: 200
}
}
};
constructor(private cdf: ChangeDetectorRef) {}
onData(data: Page) {
this.text = data.text;
this.cdf.detectChanges();
}
onScanOCR(scanner: NgxScannerTextComponent) {
scanner.scanOCR(this.configs).subscribe(console.log);
}
}
export interface Configs {
src: string;
color: string;
isAuto: boolean;
isImage: boolean;
languages: string[];
jobId?: string;
output?: Partial<OutputFormats>;
options?: Partial<RecognizeOptions>;
};
export interface Scheduler {
addWorker(worker: Worker): string;
addJob(
action: 'recognize',
...args: Parameters<Worker['recognize']>
): Promise<RecognizeResult>;
addJob(
action: 'detect',
...args: Parameters<Worker['detect']>
): Promise<DetectResult>;
terminate(): Promise<any>;
getQueueLen(): number;
getNumWorkers(): number;
}
export interface Worker {
load(jobId?: string): Promise<ConfigResult>;
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>;
readText(path: string, jobId?: string): Promise<ConfigResult>;
removeText(path: string, jobId?: string): Promise<ConfigResult>;
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>;
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult>;
initialize(
langs?: string | Lang[],
oem?: OEM,
config?: string | Partial<InitOptions>,
jobId?: string
): Promise<ConfigResult>;
setParameters(
params: Partial<WorkerParams>,
jobId?: string
): Promise<ConfigResult>;
getImage(type: imageType): string;
recognize(
image: ImageLike,
options?: Partial<RecognizeOptions>,
output?: Partial<OutputFormats>,
jobId?: string
): Promise<RecognizeResult>;
detect(image: ImageLike, jobId?: string): Promise<DetectResult>;
terminate(jobId?: string): Promise<ConfigResult>;
getPDF(
title?: string,
textonly?: boolean,
jobId?: string
): Promise<GetPDFResult>;
}
export interface Lang {
code: string;
data: unknown;
}
export interface InitOptions {
load_system_dawg: string;
load_freq_dawg: string;
load_unambig_dawg: string;
load_punc_dawg: string;
load_number_dawg: string;
load_bigram_dawg: string;
}
export type LoggerMessage = {
jobId: string;
progress: number;
status: string;
userJobId: string;
workerId: string;
};
export interface WorkerOptions {
corePath: string;
langPath: string;
cachePath: string;
dataPath: string;
workerPath: string;
cacheMethod: string;
workerBlobURL: boolean;
gzip: boolean;
logger: (arg: LoggerMessage) => void;
errorHandler: (arg: any) => void;
}
export interface WorkerParams {
tessedit_ocr_engine_mode: OEM;
tessedit_pageseg_mode: PSM;
tessedit_char_whitelist: string;
preserve_interword_spaces: string;
user_defined_dpi: string;
tessjs_create_hocr: string;
tessjs_create_tsv: string;
tessjs_create_box: string;
tessjs_create_unlv: string;
tessjs_create_osd: string;
}
export interface OutputFormats {
text: boolean;
blocks: boolean;
layoutBlocks: boolean;
hocr: boolean;
tsv: boolean;
box: boolean;
unlv: boolean;
osd: boolean;
pdf: boolean;
imageColor: boolean;
imageGrey: boolean;
imageBinary: boolean;
debug: boolean;
}
export interface RecognizeOptions {
rectangle: Rectangle;
pdfTitle: string;
pdfTextOnly: boolean;
rotateAuto: boolean;
rotateRadians: number;
}
export interface ConfigResult {
jobId: string;
data: any;
}
export interface RecognizeResult {
jobId: string;
data: Page;
}
export interface GetPDFResult {
jobId: string;
data: number[];
}
export interface DetectResult {
jobId: string;
data: DetectData;
}
export interface DetectData {
tesseract_script_id: number | null;
script: string | null;
script_confidence: number | null;
orientation_degrees: number | null;
orientation_confidence: number | null;
}
export interface Rectangle {
left: number;
top: number;
width: number;
height: number;
}
export enum OEM {
TESSERACT_ONLY,
LSTM_ONLY,
TESSERACT_LSTM_COMBINED,
DEFAULT,
}
export enum PSM {
OSD_ONLY = '0',
AUTO_OSD = '1',
AUTO_ONLY = '2',
AUTO = '3',
SINGLE_COLUMN = '4',
SINGLE_BLOCK_VERT_TEXT = '5',
SINGLE_BLOCK = '6',
SINGLE_LINE = '7',
SINGLE_WORD = '8',
CIRCLE_WORD = '9',
SINGLE_CHAR = '10',
SPARSE_TEXT = '11',
SPARSE_TEXT_OSD = '12',
RAW_LINE = '13',
}
export const enum imageType {
COLOR = 0,
GREY = 1,
BINARY = 2,
}
export type ImageLike =
| string
| HTMLImageElement
| HTMLCanvasElement
| HTMLVideoElement
| CanvasRenderingContext2D
| File
| Blob
| ImageData
| any;
export interface Block {
paragraphs: Paragraph[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
blocktype: string;
polygon: any;
page: Page;
lines: Line[];
words: Word[];
symbols: Symbol[];
}
export interface Baseline {
x0: number;
y0: number;
x1: number;
y1: number;
has_baseline: boolean;
}
export interface Bbox {
x0: number;
y0: number;
x1: number;
y1: number;
}
export interface Line {
words: Word[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
paragraph: Paragraph;
block: Block;
page: Page;
symbols: Symbol[];
}
export interface Paragraph {
lines: Line[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_ltr: boolean;
block: Block;
page: Page;
words: Word[];
symbols: Symbol[];
}
export interface Symbol {
choices: Choice[];
image: any;
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_superscript: boolean;
is_subscript: boolean;
is_dropcap: boolean;
word: Word;
line: Line;
paragraph: Paragraph;
block: Block;
page: Page;
}
export interface Choice {
text: string;
confidence: number;
}
export interface Word {
symbols: Symbol[];
choices: Choice[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_numeric: boolean;
in_dictionary: boolean;
direction: string;
language: string;
is_bold: boolean;
is_italic: boolean;
is_underlined: boolean;
is_monospace: boolean;
is_serif: boolean;
is_smallcaps: boolean;
font_size: number;
font_id: number;
font_name: string;
line: Line;
paragraph: Paragraph;
block: Block;
page: Page;
}
export interface Page {
blocks: Block[] | null;
confidence: number;
lines: Line[];
oem: string;
osd: string;
paragraphs: Paragraph[];
psm: string;
symbols: Symbol[];
text: string;
version: string;
words: Word[];
hocr: string | null;
tsv: string | null;
box: string | null;
unlv: string | null;
sd: string | null;
imageColor: string | null;
imageGrey: string | null;
imageBinary: string | null;
rotateRadians: number | null;
pdf: number[] | null;
}
[
{
"name": "Afrikaans",
"code": "afr",
"dir": "ltr"
},
{
"name": "Albanian",
"code": "qi",
"dir": "ltr"
},
{
"name": "Amharic",
"code": "amh",
"dir": "rtl"
},
{
"name": "Arabic",
"code": "ara",
"dir": "rtl"
},
{
"name": "Armenian",
"code": "hye",
"dir": "ltr"
},
{
"name": "Azerbaijani",
"code": "aze",
"dir": "ltr"
},
{
"name": "Basque",
"code": "eus",
"dir": "ltr"
},
{
"name": "Belarusian",
"code": "bel",
"dir": "ltr"
},
{
"name": "Bengali",
"code": "ben",
"dir": "ltr"
},
{
"name": "Bosnian",
"code": "bos",
"dir": "ltr"
},
{
"name": "Bulgarian",
"code": "bul",
"dir": "ltr"
},
{
"name": "Catalan",
"code": "cat",
"dir": "ltr"
},
{
"name": "Cebuano",
"code": "ceb",
"dir": "ltr"
},
{
"name": "Cherokee",
"code": "chr",
"dir": "ltr"
},
{
"name": "Chinese (Simplified)",
"code": "chi_sim",
"dir": "ltr"
},
{
"name": "Chinese (Traditional)",
"code": "chi_tra",
"dir": "ltr"
},
{
"name": "Corsican",
"code": "cos",
"dir": "ltr"
},
{
"name": "Croatian",
"code": "hrv",
"dir": "ltr"
},
{
"name": "Czech",
"code": "ces",
"dir": "ltr"
},
{
"name": "Danish",
"code": "dan",
"dir": "ltr"
},
{
"name": "Dutch",
"code": "nld",
"dir": "ltr"
},
{
"name": "English",
"code": "eng",
"dir": "ltr"
},
{
"name": "Esperanto",
"code": "epo",
"dir": "ltr"
},
{
"name": "Estonian",
"code": "est",
"dir": "ltr"
},
{
"name": "Finnish",
"code": "fin",
"dir": "ltr"
},
{
"name": "French",
"code": "fra",
"dir": "ltr"
},
{
"name": "Frisian",
"code": "fry",
"dir": "ltr"
},
{
"name": "Galician",
"code": "glg",
"dir": "ltr"
},
{
"name": "Georgian",
"code": "kat",
"dir": "ltr"
},
{
"name": "German",
"code": "deu",
"dir": "ltr"
},
{
"name": "Greek",
"code": "ell",
"dir": "ltr"
},
{
"name": "Gujarati",
"code": "guj",
"dir": "ltr"
},
{
"name": "Haitian Creole",
"code": "hat",
"dir": "ltr"
},
{
"name": "Hausa",
"code": "hau",
"dir": "rtl"
},
{
"name": "Hebrew",
"code": "heb",
"dir": "rtl"
},
{
"name": "Hindi",
"code": "hin",
"dir": "ltr"
},
{
"name": "Hungarian",
"code": "hun",
"dir": "ltr"
},
{
"name": "Icelandic",
"code": "isl",
"dir": "ltr"
},
{
"name": "Igbo",
"code": "ibo",
"dir": "ltr"
},
{
"name": "Indonesian",
"code": "ind",
"dir": "ltr"
},
{
"name": "Irish",
"code": "gle",
"dir": "ltr"
},
{
"name": "Italian",
"code": "ita",
"dir": "ltr"
},
{
"name": "Japanese",
"code": "jpn",
"dir": "ltr"
},
{
"name": "Javanese",
"code": "jav",
"dir": "ltr"
},
{
"name": "Kannada",
"code": "kan",
"dir": "ltr"
},
{
"name": "Kazakh",
"code": "kaz",
"dir": "ltr"
},
{
"name": "Khmer",
"code": "khm",
"dir": "ltr"
},
{
"name": "Kinyarwanda",
"code": "kin",
"dir": "ltr"
},
{
"name": "Korean",
"code": "kor",
"dir": "ltr"
},
{
"name": "Kurdish (Kurmanji)",
"code": "kur_ara",
"dir": "rtl"
},
{
"name": "Kyrgyz",
"code": "kir",
"dir": "ltr"
},
{
"name": "Lao",
"code": "lao",
"dir": "ltr"
},
{
"name": "Latin",
"code": "lat",
"dir": "ltr"
},
{
"name": "Latvian",
"code": "lav",
"dir": "ltr"
},
{
"name": "Lithuanian",
"code": "lit",
"dir": "ltr"
},
{
"name": "Luxembourgish",
"code": "ltz",
"dir": "ltr"
},
{
"name": "Macedonian",
"code": "kd",
"dir": "ltr"
},
{
"name": "Malagasy",
"code": "lg",
"dir": "ltr"
},
{
"name": "Malay",
"code": "sa",
"dir": "ltr"
},
{
"name": "Malayalam",
"code": "al",
"dir": "ltr"
},
{
"name": "Maltese",
"code": "lt",
"dir": "ltr"
},
{
"name": "Maori",
"code": "i",
"dir": "ltr"
},
{
"name": "Marathi",
"code": "ar",
"dir": "ltr"
},
{
"name": "Mongolian",
"code": "on",
"dir": "ltr"
},
{
"name": "Myanmar (Burmese)",
"code": "ya",
"dir": "ltr"
},
{
"name": "Nepali",
"code": "nep",
"dir": "ltr"
},
{
"name": "Norwegian",
"code": "nor",
"dir": "ltr"
},
{
"name": "Odia (Oriya)",
"code": "ori",
"dir": "ltr"
},
{
"name": "Pashto",
"code": "pus",
"dir": "rtl"
},
{
"name": "Persian",
"code": "fas",
"dir": "rtl"
},
{
"name": "Polish",
"code": "pol",
"dir": "ltr"
},
{
"name": "Portuguese",
"code": "por",
"dir": "ltr"
},
{
"name": "Punjabi",
"code": "pan",
"dir": "ltr"
},
{
"name": "Romanian",
"code": "ron",
"dir": "ltr"
},
{
"name": "Russian",
"code": "rus",
"dir": "ltr"
},
{
"name": "Samoan",
"code": "mo",
"dir": "ltr"
},
{
"name": "Scots Gaelic",
"code": "gla",
"dir": "ltr"
},
{
"name": "Serbian",
"code": "rp",
"dir": "ltr"
},
{
"name": "Sesotho",
"code": "ot",
"dir": "ltr"
},
{
"name": "Shona",
"code": "na",
"dir": "ltr"
},
{
"name": "Sindhi",
"code": "d",
"dir": "rtl"
},
{
"name": "Sinhala (Sinhalese)",
"code": "in",
"dir": "in"
},
{
"name": "Slovak",
"code": "k",
"dir": "ltr"
},
{
"name": "Slovenian",
"code": "l",
"dir": "ltr"
},
{
"name": "Somali",
"code": "o",
"dir": "ltr"
},
{
"name": "Spanish",
"code": "pa",
"dir": "ltr"
},
{
"name": "Sundanese",
"code": "u",
"dir": "ltr"
},
{
"name": "Swahili",
"code": "wa",
"dir": "ltr"
},
{
"name": "Swedish",
"code": "we",
"dir": "ltr"
},
{
"name": "Tajik",
"code": "tg",
"dir": "ltr"
},
{
"name": "Tamil",
"code": "ta",
"dir": "ltr"
},
{
"name": "Tatar",
"code": "tt",
"dir": "ltr"
},
{
"name": "Telugu",
"code": "te",
"dir": "ltr"
},
{
"name": "Thai",
"code": "th",
"dir": "ltr"
},
{
"name": "Turkish",
"code": "tur",
"dir": "ltr"
},
{
"name": "Ukrainian",
"code": "ukr",
"dir": "ltr"
},
{
"name": "Urdu",
"code": "urd",
"dir": "rtl"
},
{
"name": "Uzbek",
"code": "uzb",
"dir": "ltr"
},
{
"name": "Vietnamese",
"code": "vie",
"dir": "ltr"
},
{
"name": "Welsh",
"code": "cym",
"dir": "ltr"
},
{
"name": "Xhosa",
"code": "xho",
"dir": "ltr"
},
{
"name": "Yiddish",
"code": "yi",
"dir": "rtl"
},
{
"name": "Yoruba",
"code": "yo",
"dir": "ltr"
},
{
"name": "Zulu",
"code": "zu",
"dir": "ltr"
}
]
Note that the `dir` property indicates the direction of the language,
where `ltr` means left-to-right and `rtl` means right-to-left.
API Documentation :rescue_worker_helmet:
Input :electric_plug:
| Field | Description | Type | Default | | --- | --- | --- | --- | | [configs] | config | Configs | {} |
Ouput :electric_plug:
| Field | Description | Type | Default | | --- | --- | --- | --- | | (event) | result data | BehaviorSubject | {} |
Component exports :electric_plug:
| Field | Description | Type | Default | | --- | --- | --- | --- | | data$ | result data | BehaviorSubject | {} | | logger$ | status | BehaviorSubject | {} | | image$ | actual photo size | BehaviorSubject | {width: 0, height: 0} |
Support versions
Author Information
If you want donate for me! :moneybag:
MIT License Copyright (c) 2022 DaiDH