diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRModel.defaults.yml index 1d644fbaae0e2cc2fd45fc93a2e4822a10b86da1..a6f703eb581aa0e629e2ef5df06c6fe8b52e7df7 100644 --- a/app/TesseractOCRModel.defaults.yml +++ b/app/TesseractOCRModel.defaults.yml @@ -2,6 +2,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/afr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -15,6 +17,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -28,6 +32,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -41,6 +47,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -54,6 +62,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -67,6 +77,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -80,6 +92,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -93,6 +107,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -106,6 +122,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -119,6 +137,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -132,6 +152,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -145,6 +167,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -158,6 +182,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -171,6 +197,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -184,6 +212,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -197,6 +227,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -210,6 +242,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -223,6 +257,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -236,6 +272,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -249,6 +287,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -262,6 +302,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -275,6 +317,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -288,6 +332,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -301,6 +347,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -314,6 +362,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -327,6 +377,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -340,6 +392,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -353,6 +407,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -366,6 +422,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -379,6 +437,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -392,6 +452,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -405,6 +467,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -418,6 +482,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -431,6 +497,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -444,6 +512,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -457,6 +527,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -470,6 +542,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -483,6 +557,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -496,6 +572,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -509,6 +587,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -522,6 +602,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -535,6 +617,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -548,6 +632,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -561,6 +647,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -574,6 +662,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -587,6 +677,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -600,6 +692,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -613,6 +707,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -626,6 +722,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -639,6 +737,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -652,6 +752,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -665,6 +767,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -678,6 +782,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -691,6 +797,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -704,6 +812,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -717,6 +827,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -730,6 +842,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -743,6 +857,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -756,6 +872,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -769,6 +887,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -782,6 +902,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -795,6 +917,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -808,6 +932,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -821,6 +947,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -834,6 +962,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -847,6 +977,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -860,6 +992,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -873,6 +1007,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -886,6 +1022,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -899,6 +1037,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -912,6 +1052,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -925,6 +1067,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -938,6 +1082,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -951,6 +1097,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -964,6 +1112,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -977,6 +1127,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -990,6 +1142,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1003,6 +1157,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1016,6 +1172,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1029,6 +1187,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1042,6 +1202,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -1055,6 +1217,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -1068,6 +1232,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1081,6 +1247,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1094,6 +1262,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1107,6 +1277,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1120,6 +1292,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1133,6 +1307,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1146,6 +1322,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1159,6 +1337,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1172,6 +1352,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1185,6 +1367,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1198,6 +1382,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1211,6 +1397,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1224,6 +1412,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1237,6 +1427,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1250,6 +1442,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1263,6 +1457,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1276,6 +1472,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1289,6 +1487,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1302,6 +1502,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1315,6 +1517,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: diff --git a/app/TranskribusHTRModel.defaults.yml b/app/TranskribusHTRModel.defaults.yml deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/app/__init__.py b/app/__init__.py index 9db97d3f2558fdc8e5f7f6347d568ccbd94810f5..304f937d5b3ecd481b626a0906c7fe39cbdad54b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -29,9 +29,9 @@ def create_app(config: Config = Config) -> Flask: ''' Creates an initialized Flask (WSGI Application) object. ''' app: Flask = Flask(__name__) app.config.from_object(config) + config.init_app(app) assets.init_app(app) - config.init_app(app) db.init_app(app) hashids.init_app(app) login.init_app(app) diff --git a/app/cli.py b/app/cli.py index 4bff77d3c89da2bd35f875831fdd930caf973849..64cf4fb7a12a29c869a82142ebfb8b8d3dc63145 100644 --- a/app/cli.py +++ b/app/cli.py @@ -1,7 +1,7 @@ from flask import current_app from flask_migrate import upgrade from . import db -from .models import Corpus, Job, Role, User, TesseractOCRModel +from .models import Corpus, Role, User, TesseractOCRModel, TranskribusHTRModel import click import os @@ -36,8 +36,10 @@ def register(app): Role.insert_defaults() current_app.logger.info('Insert/Update default users') User.insert_defaults() - current_app.logger.info('Insert/Update default tesseract ocr models') + current_app.logger.info('Insert/Update default TesseractOCRModels') TesseractOCRModel.insert_defaults() + current_app.logger.info('Insert/Update default TranskribusHTRModels') + TranskribusHTRModel.insert_defaults() @app.cli.group() def daemon(): diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index e56bafbc4e6b9ca03f9278649d63c019c5a0ce88..d65fc3be6bf02027805052127a9d3220354bb990 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -1,5 +1,11 @@ from app import db -from app.models import Job, JobResult, JobStatus, TesseractOCRModel +from app.models import ( + Job, + JobResult, + JobStatus, + TesseractOCRModel, + TranskribusHTRModel +) from datetime import datetime from flask import current_app from werkzeug.utils import secure_filename @@ -56,7 +62,8 @@ class CheckJobsMixin: if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' elif job.service == 'transkribus-htr-pipeline': - command += f' -m {job.service_args["model"]}' + transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) + command += f' -m {transkribus_htr_model.transkribus_model_id}' readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') command += f' --readcoop-username "{readcoop_username}"' readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD') diff --git a/app/models.py b/app/models.py index 1a857dd4b27edf3377bb50b878afa0c43a636a07..3c4bd94f54252472fa081219572a4d2b9a9c6080 100644 --- a/app/models.py +++ b/app/models.py @@ -18,6 +18,10 @@ import xml.etree.ElementTree as ET import yaml +TRANSKRIBUS_HTR_MODELS = \ + json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text').content)['trpModelMetadata'] # noqa + + class IntEnumColumn(db.TypeDecorator): impl = db.Integer @@ -187,6 +191,12 @@ class User(HashidMixin, UserMixin, db.Model): cascade='all, delete-orphan', lazy='dynamic' ) + transkribus_htr_models = db.relationship( + 'TranskribusHTRModel', + backref='user', + cascade='all, delete-orphan', + lazy='dynamic' + ) corpora = db.relationship( 'Corpus', backref='user', @@ -362,6 +372,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): compatible_service_versions = db.Column(ContainerColumn(list, 255)) description = db.Column(db.String(255)) publisher = db.Column(db.String(128)) + publisher_url = db.Column(db.String(512)) + publishing_url = db.Column(db.String(512)) publishing_year = db.Column(db.Integer) shared = db.Column(db.Boolean, default=False) title = db.Column(db.String(64)) @@ -383,7 +395,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): 'compatible_service_versions': self.compatible_service_versions, 'description': self.description, 'publisher': self.publisher, + 'publisher_url': self.publisher_url, + 'publishing_url': self.publishing_url, 'publishing_year': self.publishing_year, + 'shared': self.shared, 'title': self.title, **self.file_mixin_to_dict() } @@ -409,7 +424,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): model.compatible_service_versions = m['compatible_service_versions'] model.description = m['description'] model.publisher = m['publisher'] + model.publisher_url = m['publisher_url'] + model.publishing_url = m['publishing_url'] model.publishing_year = m['publishing_year'] + model.shared = True model.title = m['title'] model.version = m['version'] continue @@ -417,6 +435,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): compatible_service_versions=m['compatible_service_versions'], description=m['description'], publisher=m['publisher'], + publisher_url=m['publisher_url'], + publishing_url=m['publishing_url'], publishing_year=m['publishing_year'], shared=True, title=m['title'], @@ -445,6 +465,58 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): db.session.commit() +class TranskribusHTRModel(HashidMixin, db.Model): + __tablename__ = 'transkribus_htr_models' + # Primary key + id = db.Column(db.Integer, primary_key=True) + # Foreign keys + user_id = db.Column(db.Integer, db.ForeignKey('users.id')) + # Fields + shared = db.Column(db.Boolean, default=False) + transkribus_model_id = db.Column(db.Integer) + transkribus_name = db.Column(db.String(64)) + # Backrefs: user: User + + def to_dict(self, backrefs=False, relationships=False): + dict_tesseract_ocr_model = { + 'id': self.hashid, + 'user_id': self.user.hashid, + 'shared': self.shared, + 'transkribus_model_id': self.transkribus_model_id, + 'transkribus_name': self.transkribus_name + } + if backrefs: + dict_tesseract_ocr_model['user'] = \ + self.user.to_dict(backrefs=True, relationships=False) + if relationships: + pass + return dict_tesseract_ocr_model + + @staticmethod + def insert_defaults(): + user = User.query.filter_by(username='nopaque').first() + models = [ + m for m in TRANSKRIBUS_HTR_MODELS if True + and 'creator' in m and m['creator'] == 'Transkribus Team' + and 'docType' in m and m['docType'] == 'handwritten' + ] + for m in models: + model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa + if model is not None: + model.shared = True + model.transkribus_model_id = m['modelId'] + model.transkribus_name = m['name'] + continue + model = TranskribusHTRModel( + shared=True, + transkribus_name=m['name'], + transkribus_model_id=m['modelId'], + user=user, + ) + db.session.add(model) + db.session.commit() + + class JobInput(FileMixin, HashidMixin, db.Model): __tablename__ = 'job_inputs' # Primary key diff --git a/app/services/__init__.py b/app/services/__init__.py index e41a895df47464871c47a5c2e2d80e872485d17e..73c78b5994129da482b94bfce647ea504a034cff 100644 --- a/app/services/__init__.py +++ b/app/services/__init__.py @@ -3,11 +3,10 @@ import os import yaml -services_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), 'services.yml') +services_file = \ + os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml') with open(services_file, 'r') as f: SERVICES = yaml.safe_load(f) - bp = Blueprint('services', __name__) from . import routes # noqa diff --git a/app/services/forms.py b/app/services/forms.py index 9d7bf45db8c3a978e0dea4996851132651f2705e..c35621db05d34a73f290deadc9422e55f429b97e 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -1,4 +1,4 @@ -from app.models import TesseractOCRModel +from app.models import TesseractOCRModel, TranskribusHTRModel from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired @@ -115,11 +115,10 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm): if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] + models = TranskribusHTRModel.query.filter_by(shared=True).all() + models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all() self.model.choices = [('', 'Choose your option')] - self.model.choices += [ - ('37569', 'Tim Model'), - ('29539', 'UCL–University of Toronto #7') - ] + self.model.choices += [(x.hashid, x.transkribus_name) for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version diff --git a/app/services/routes.py b/app/services/routes.py index feecf39ad6c91097d7b5eae9d230339253f6e522..638ff1cf28356b57fc76d9cd8edbba153bc4f2dc 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -1,5 +1,12 @@ from app import db, hashids -from app.models import Job, JobInput, JobStatus +from app.models import ( + Job, + JobInput, + JobStatus, + TesseractOCRModel, + TRANSKRIBUS_HTR_MODELS, + TranskribusHTRModel +) from flask import ( abort, current_app, @@ -74,7 +81,7 @@ def file_setup_pipeline(): flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/file_setup_pipeline.html.j2', form=form, title=service_manifest['name'] ) @@ -133,9 +140,11 @@ def tesseract_ocr_pipeline(): db.session.commit() flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa + tesseract_ocr_models = TesseractOCRModel.query.all() return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/tesseract_ocr_pipeline.html.j2', form=form, + tesseract_ocr_models=tesseract_ocr_models, title=service_manifest['name'] ) @@ -155,7 +164,7 @@ def transkribus_htr_pipeline(): if not form.validate(): return make_response(form.errors, 400) service_args = {} - service_args['model'] = form.model.data + service_args['model'] = hashids.decode(form.model.data) if form.binarization.data: service_args['binarization'] = True job = Job( @@ -195,10 +204,14 @@ def transkribus_htr_pipeline(): db.session.commit() flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa + transkribus_htr_models = TranskribusHTRModel.query.filter_by(shared=True).all() + transkribus_htr_models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all() return render_template( - f'services/{service.replace("-", "_")}.html.j2', + f'services/transkribus_htr_pipeline.html.j2', form=form, - title=service_manifest['name'] + title=service_manifest['name'], + TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, + transkribus_htr_models=transkribus_htr_models ) @@ -256,7 +269,7 @@ def spacy_nlp_pipeline(): flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/spacy_nlp_pipeline.html.j2', form=form, title=service_manifest['name'] ) diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 723e775869f2dc6b431dd7cd0ef847c0a702767a..129b74aa93da9a91bff7acbc9337078f5808985e 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -53,7 +53,17 @@ {{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }} </div> <div class="col s12 l4"> - {{ wtf.render_field(form.model, material_icon='language') }} + <div class="input-field"> + <i class="material-icons prefix">language</i> + {{ form.model() }} + {{ form.model.label }} + <span class="helper-text"> + <a class="modal-trigger" href="#models-modal">More details about models</a> + </span> + {% for error in form.model.errors %} + <span class="helper-text error-color-text">{{ error }}</span> + {% endfor %} + </div> </div> <div class="col s12 l3"> {{ wtf.render_field(form.version, material_icon='apps') }} @@ -138,6 +148,37 @@ {% block modals %} {{ super() }} +<div id="models-modal" class="modal"> + <div class="modal-content"> + <h4>Tesseract OCR Pipeline models</h4> + <table> + <thead> + <tr> + <th>Title</th> + <th>Description</th> + <th>Biblio</th> + </tr> + </thead> + <tbody> + {% for m in tesseract_ocr_models %} + <tr id="tesseract-ocr-model-{{ m.hashid }}"> + <td>{{ m.title }}</td> + {% if m.description == '' %} + <td>Description is not available.</td> + {% else %} + <td>{{ m.description }}</td> + {% endif %} + <td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td> + </tr> + {% endfor %} + </tbody> + </table> + </div> + <div class="modal-footer"> + <a href="#!" class="modal-close waves-effect waves-light btn">Close</a> + </div> +</div> + <div id="progress-modal" class="modal"> <div class="modal-content"> <h4><i class="material-icons left">file_upload</i>Uploading files...</h4> diff --git a/app/templates/services/transkribus_htr_pipeline.html.j2 b/app/templates/services/transkribus_htr_pipeline.html.j2 index 931c85dc896e7164abf4d585b70ed0112f187e9d..7708e8d8636ed7223c0b17730a1fc8c38d51f7af 100644 --- a/app/templates/services/transkribus_htr_pipeline.html.j2 +++ b/app/templates/services/transkribus_htr_pipeline.html.j2 @@ -58,7 +58,17 @@ {{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }} </div> <div class="col s12 l4"> - {{ wtf.render_field(form.model, material_icon='language') }} + <div class="input-field"> + <i class="material-icons prefix">language</i> + {{ form.model() }} + {{ form.model.label }} + <span class="helper-text"> + <a class="modal-trigger" href="#models-modal">More details about models</a> + </span> + {% for error in form.model.errors %} + <span class="helper-text error-color-text">{{ error }}</span> + {% endfor %} + </div> </div> <div class="col s12 l3"> {{ wtf.render_field(form.version, material_icon='apps') }} @@ -143,6 +153,29 @@ {% block modals %} {{ super() }} +<div id="models-modal" class="modal"> + <div class="modal-content"> + <h4>Transkribus HTR Pipeline models</h4> + <ul class="collapsible popout" id="transkribus-htr-models"> + {% for m in transkribus_htr_models %} + <li id="transkribus-htr-model-{{ m.hashid }}"> + <div class="collapsible-header"><i class="material-icons">widgets</i>{{ m.transkribus_name }}</div> + <div class="collapsible-body"> + {% for m_info in TRANSKRIBUS_HTR_MODELS %} + {% if m_info['modelId'] == m.transkribus_model_id %} + {{ m_info|tojson }} + {% endif %} + {% endfor %} + </div> + </li> + {% endfor %} + </ul> + </div> + <div class="modal-footer"> + <a href="#!" class="modal-close waves-effect waves-light btn">Close</a> + </div> +</div> + <div id="progress-modal" class="modal"> <div class="modal-content"> <h4><i class="material-icons left">file_upload</i>Uploading files...</h4> diff --git a/migrations/versions/aa855b80cf1d_.py b/migrations/versions/9e8d7d15d950_.py similarity index 90% rename from migrations/versions/aa855b80cf1d_.py rename to migrations/versions/9e8d7d15d950_.py index 687c89a8ddc9214931d07e9986bebe333f6250e9..b76a490ea3ec0aedf252f49b78976a5cac4e263d 100644 --- a/migrations/versions/aa855b80cf1d_.py +++ b/migrations/versions/9e8d7d15d950_.py @@ -1,8 +1,8 @@ """empty message -Revision ID: aa855b80cf1d +Revision ID: 9e8d7d15d950 Revises: -Create Date: 2022-04-01 12:14:42.606685 +Create Date: 2022-04-22 09:38:49.527498 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = 'aa855b80cf1d' +revision = '9e8d7d15d950' down_revision = None branch_labels = None depends_on = None @@ -83,6 +83,8 @@ def upgrade(): sa.Column('compatible_service_versions', sa.String(length=255), nullable=True), sa.Column('description', sa.String(length=255), nullable=True), sa.Column('publisher', sa.String(length=128), nullable=True), + sa.Column('publisher_url', sa.String(length=512), nullable=True), + sa.Column('publishing_url', sa.String(length=512), nullable=True), sa.Column('publishing_year', sa.Integer(), nullable=True), sa.Column('shared', sa.Boolean(), nullable=True), sa.Column('title', sa.String(length=64), nullable=True), @@ -90,6 +92,15 @@ def upgrade(): sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), sa.PrimaryKeyConstraint('id') ) + op.create_table('transkribus_htr_models', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=True), + sa.Column('shared', sa.Boolean(), nullable=True), + sa.Column('transkribus_model_id', sa.Integer(), nullable=True), + sa.Column('transkribus_name', sa.String(length=64), nullable=True), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id') + ) op.create_table('corpus_files', sa.Column('creation_date', sa.DateTime(), nullable=True), sa.Column('filename', sa.String(length=255), nullable=True), @@ -141,6 +152,7 @@ def downgrade(): op.drop_table('job_results') op.drop_table('job_inputs') op.drop_table('corpus_files') + op.drop_table('transkribus_htr_models') op.drop_table('tesseract_ocr_models') op.drop_table('jobs') op.drop_table('corpora') diff --git a/nopaque.py b/nopaque.py index 243b915d0b253f4449727ac74d047dfff7f77b1d..0f2d0d2a0eded53d880e7ec95c301632c17ccda8 100644 --- a/nopaque.py +++ b/nopaque.py @@ -13,6 +13,7 @@ from app.models import ( Permission, Role, TesseractOCRModel, + TranskribusHTRModel, User ) # noqa from flask import Flask # noqa @@ -42,5 +43,6 @@ def make_shell_context() -> Dict[str, Any]: 'Permission': Permission, 'Role': Role, 'TesseractOCRModel': TesseractOCRModel, + 'TranskribusHTRModel': TranskribusHTRModel, 'User': User }