HTTP 200 OK
Allow: GET, POST, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 7867,
"next": "https://metax.fairdata.fi/v3/datasets?format=api&limit=20&offset=7380",
"previous": "https://metax.fairdata.fi/v3/datasets?format=api&limit=20&offset=7340",
"results": [
{
"id": "660ddd3b-55d5-494f-8a87-69a960942e19",
"access_rights": {
"id": "2b51f82c-b4f2-472f-b107-b8716174a397",
"license": [
{
"id": "d11e207f-2242-448c-86dd-47b1920bdbd8",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaRes",
"title": {
"en": "CLARIN RES (Restricted) End-User License 1.0",
"und": "CLARIN RES (Restricted) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "dfb38790-f105-4541-99fb-3a0950753a81",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "b95cd02c-5447-4e44-9dc9-7e2c796e8ad8",
"name": "Jussi Niemi",
"email": "<hidden>"
},
"organization": {
"id": "636b6a56-8991-467d-a399-de5a6347cb02",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpus of spoken language output of a child acquiring Finnish (age 2;4 to 6;7)\n\nabout 6000 word tokens in total, about 6 person months\n\nRelevant publication(s) using the corpus: Niemi, Jussi & Sinikka Niemi: Acquisition of inflectional marking: A case study of Finnish. Nordic Journal of Linguistics 10: 59-89 (1987).\n\nlog\n25.11.2018 http://islrn.org/resources/955-494-800-981-0 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141416827900",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730132",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Joensuu Language Acquisition Corpus"
},
"created": "2018-06-19T12:43:05Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "660ddd3b-55d5-494f-8a87-69a960942e19",
"title": {
"en": "Joensuu Language Acquisition Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730132",
"state": "published",
"created": "2018-06-19T12:43:05Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "1ebe69d9-61f5-4105-8bd3-b835ccd27e05",
"access_rights": {
"id": "6763bf63-43ae-408b-978b-0854a81ad5f9",
"license": [
{
"id": "29323037-2644-45f6-9b44-d9d2a9375c26",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaAcaNCDep",
"title": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0",
"und": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "e4101291-047a-4abb-8e9c-e6462cd43142",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "dc50b359-8ad9-4426-815b-51eacfbdd51a",
"pref_label": {
"en": "University of Helsinki"
},
"homepage": {
"url": "http://www.helsinki.fi/fus/"
},
"email": "<hidden>"
}
},
{
"id": "3e837da3-06d6-4690-a50f-43d6ee993c0e",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "5c3351a0-9689-4b7e-a309-9c58afe5b194",
"pref_label": {
"en": "Institute for the Languages of Finland",
"fi": "Kotimaisten kielten keskus"
},
"homepage": {
"url": "http://www.kotus.fi"
},
"email": "<hidden>"
}
},
{
"id": "fbf2a921-9bfb-406f-b77e-9efacc6c6fdb",
"roles": [
"curator"
],
"person": {
"id": "980cc6c4-3c87-4e17-950b-327dba776dab",
"name": "Hanna Lappalainen",
"email": "<hidden>"
},
"organization": {
"id": "4d8ef5f0-9586-4e72-856f-b733974fbeee",
"pref_label": {
"en": "University of Helsinki"
},
"homepage": {
"url": "http://www.helsinki.fi/fus/index.htm"
},
"email": "<hidden>"
}
},
{
"id": "8bc53107-cb5f-4d2d-8c44-3be8d4ed1c7f",
"roles": [
"curator"
],
"organization": {
"id": "1a7bba1a-9c10-42bd-8d3e-036bf6888c5f",
"pref_label": {
"en": "University of Helsinki"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This corpus is a subset of The Longitudinal Corpus of Finnish Spoken in Helsinki, which contains interviews with people of different ages born in Helsinki. The Longitudinal Corpus of Finnish Spoken in Helsinki (2010s text corpus) contains the anonymized transcripts of the 2010s interviews.\n\nThe 2010s material is based on about one hour long audio recordings of individual interviews. Most of the interview questions are about how the interviewees perceive Helsinki, living and traveling in the city, as well as the languages and language forms spoken in Helsinki. The interviews also touch upon such topics as school, work and hobbies related issues of the interviewees.\nA part of the corpus has been transcribed and thematically coded. The Longitudinal Corpus of Finnish Spoken in Helsinki (2010s text corpus) contains also parts in which the audio material and the transcript are aligned.\nWork on the transcription, alignment and thematic coding of the corpus is planned to continue in the future.\n\n\nThe corpus should be referred to in the following way:\n\nThe Longitudinal Corpus of Finnish Spoken in Helsinki (2010s text corpus), informant’s code (if applicable).\n\nThe informant’s code should be marked if concrete text examples of the corpus are given.\n\n\nThe corpus will be published in Korp (https://korp.csc.fi).\n\nThis corpus can be used for educational and research purposes. For detailed information on the license of the resource see https://www.kielipankki.fi/support/clarin-eula/#aca.\nHowever, in order to use the audio files, a personal permission is required, see The Longitudinal Corpus of Finnish Spoken in Helsinki (1970s, 1990s, 2010s).\n\nlog\n25.11.2018 link http://islrn.org/resources/762-219-722-630 removed",
"fi": "Tämä korpus sisältää Helsingin puhekielen pitkittäiskorpuksen 2010-luvun anonymisoidun tekstimuotoisen osa-aineiston, jota voidaan käyttää koulutus-, tutkimus- ja opetustarkoituksiin. \n\nAlkuperäinen Helsingin puhekielen pitkittäiskorpus sisältää eri-ikäisten syntyperäisten helsinkiläisten haastatteluja. 2010-luvun aineisto koostuu noin tunnin mittaisista äänitallennetuista yksilöhaastatteluista. Niissä on kyselty etenkin haastateltavien suhteesta Helsinkiin, siellä asumisesta ja liikkumisesta sekä Helsingissä puhuttavista kielistä ja kielimuodoista. Muita teemoja ovat olleet mm. haasteltavien koulunkäynti, työ ja harrastukset.\n\nOsa aineistosta on litteroitu ja teemakoodattu; osassa ääni ja litteraatti on kohdennettu toisiinsa.\nAineiston litterointia, kohdennusta ja temaattista koodausta pyritään jatkamaan.\n\n\nAineistoon viitataan seuraavasti:\nHelsingin puhekielen pitkittäiskorpus, 2010-luvun tekstiaineisto, (tarvittaessa) informantin koodi\nInformantin koodi olisi syytä merkitä silloin, jos esittää aineistosta konkreettisia tekstiesimerkkejä.\n\nKorpus julkaistaan Korp-palvelussa, https://korp.csc.fi.\n\nTätä tekstimuotoista korpusta voidaan käyttää koulutus-, tutkimus- ja opetustarkoituksiin. \n\nHuom. Ääniaineiston käyttäminen edellyttää henkilökohtaista käyttölupaa, ks. Helsingin puhekielen pitkittäiskorpus (1970, 1990, 2010).\n\n\nlog\n25.11.2018 linkki http://islrn.org/resources/762-219-722-630 poistettu"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141512400292",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-2014073040",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [
{
"start_date": "2013-01-01",
"end_date": "2013-12-31"
}
],
"theme": [],
"title": {
"en": "The Longitudinal Corpus of Finnish Spoken in Helsinki (2010s Text Corpus)",
"fi": "Helsingin puhekielen pitkittäiskorpus (2010-luvun tekstimuotoinen aineisto)"
},
"created": "2018-06-19T12:40:40Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "1ebe69d9-61f5-4105-8bd3-b835ccd27e05",
"title": {
"en": "The Longitudinal Corpus of Finnish Spoken in Helsinki (2010s Text Corpus)",
"fi": "Helsingin puhekielen pitkittäiskorpus (2010-luvun tekstimuotoinen aineisto)"
},
"persistent_identifier": "urn:nbn:fi:lb-2014073040",
"state": "published",
"created": "2018-06-19T12:40:40Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "d6588a3e-6cd4-4c4d-beab-78041bb5ac25",
"access_rights": {
"id": "2bc9c91b-f327-422f-b30d-dbaa0a9be967",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "ddfb5f33-e391-4527-9a21-c9034d2d733d",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "4bf7fa48-163a-42a8-8542-92c4543aef11",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
},
{
"id": "b3d908a2-d01b-414a-a2d9-6b6613b688f5",
"roles": [
"curator"
],
"person": {
"id": "91abc1b3-793f-4203-a721-6550c1d0a28f",
"name": "Leila Kääntä",
"email": "<hidden>"
},
"organization": {
"id": "4bf7fa48-163a-42a8-8542-92c4543aef11",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Recordings in .vow format of two weeks of Big Brother Finland 2006, recorded weeks: 4th and 13th.\n\nlog\n26.11.2018 link http://islrn.org/resources/344-203-337-923-3 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141555634890",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-201407308",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Reality TV Corpus"
},
"created": "2018-06-19T12:37:22Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "d6588a3e-6cd4-4c4d-beab-78041bb5ac25",
"title": {
"en": "Reality TV Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-201407308",
"state": "published",
"created": "2018-06-19T12:37:22Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "86cc8451-168b-4ca0-89f7-cc8d61cebccb",
"access_rights": {
"id": "1b9643ca-9a8c-4796-901f-a0fbfa430011",
"license": [
{
"id": "6142d0c6-945d-4085-b9f7-81e52afa253b",
"title": {
"en": "Other",
"fi": "Muu",
"und": "Muu"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "b06294e4-9c97-4bfb-88e3-c3a9bc5b2425",
"roles": [
"curator"
],
"person": {
"id": "9e92e2e0-ce1b-4805-ad85-f90381cb9d05",
"name": "Mikko Kurimo",
"email": "<hidden>"
},
"organization": {
"id": "673eb0e9-71ef-40af-ae58-6e83a74548cd",
"pref_label": {
"en": "Aalto University"
},
"homepage": {
"url": "http://ics.aalto.fi/"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Speech corpus collected from YouTube News."
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "5068bdfc-8176-4b93-b23b-c94ba4fe21de",
"url": "http://lexvo.org/id/iso639-3/fra",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "French",
"fi": "ranska",
"sv": "franska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2020041701",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "YTN-Aalto2019"
},
"created": "2020-04-19T01:19:57Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "86cc8451-168b-4ca0-89f7-cc8d61cebccb",
"title": {
"en": "YTN-Aalto2019"
},
"persistent_identifier": "urn:nbn:fi:lb-2020041701",
"state": "published",
"created": "2020-04-19T01:19:57Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "333c2d1b-2c86-4746-9134-d6d419d45727",
"access_rights": {
"id": "3ce35893-1fbc-4057-bf44-4f4166f9b7c5",
"license": [
{
"id": "d11e207f-2242-448c-86dd-47b1920bdbd8",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaRes",
"title": {
"en": "CLARIN RES (Restricted) End-User License 1.0",
"und": "CLARIN RES (Restricted) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "26ac0259-3335-47f8-b731-6c7fa8c87b8b",
"roles": [
"creator",
"publisher"
],
"person": {
"id": "f2f5ffde-3f54-4c31-a8ed-a3838fc87532",
"name": "Jussi Niemi",
"email": "<hidden>"
},
"organization": {
"id": "3ad133e0-b317-44cc-a516-a42fb06f9668",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
},
{
"id": "3a3d0a61-f0e9-4fb8-b07e-79f3bf5c34e7",
"roles": [
"curator"
],
"person": {
"id": "0c96fc15-b780-4d97-9594-a6ef002ca3ba",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpora of semi-spontaneous speech of two Finnish Wernicke aphasics (one's transcriptions with English morphological interlinears and translations).\n\nabout 20000 word tokens in total, about 14 person months\n\nRelevant publication(s) using the corpus: Niemi, Jussi & Matti Laine: Syntax and Inflectional Morphology in Aphasia: Quantitative Aspects of Wernicke Speakers' Narratives. Journal of Quantitative Linguistics 4: 181- 189 (1997).\n\nlog\n25.11.2018 link http://islrn.org/resources/975-012-583-606-6 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141357851489",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730121",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Joensuu Wernicke Aphasia Corpora"
},
"created": "2018-06-19T12:37:28Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "333c2d1b-2c86-4746-9134-d6d419d45727",
"title": {
"en": "Joensuu Wernicke Aphasia Corpora"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730121",
"state": "published",
"created": "2018-06-19T12:37:28Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "d74085cd-496e-42d7-a29c-3cf2a78eb45e",
"access_rights": {
"id": "f908466b-5768-47d9-a678-24cea4f80794",
"license": [
{
"id": "29323037-2644-45f6-9b44-d9d2a9375c26",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaAcaNCDep",
"title": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0",
"und": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "e6ade4af-06cc-4cbd-bd49-0625f7ff4692",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "3a9d317f-1284-41be-bdac-ec521adad7c0",
"name": "Heini Kallio",
"email": "<hidden>"
},
"organization": {
"id": "7edc22c2-6e79-4671-80dd-396acf42029b",
"pref_label": {
"en": "University of Helsinki"
},
"homepage": {
"url": "http://www.helsinki.fi/speechsciences/"
},
"email": "<hidden>"
}
},
{
"id": "9ea692f1-d5d9-4272-be7a-3efa7e8182a4",
"roles": [
"creator",
"curator"
],
"person": {
"id": "8de5d7b3-ebea-4333-9102-1c45ef864122",
"name": "Martti Vainio",
"email": "<hidden>",
"external_identifier": "http://orcid.org/0000-0003-2570-0196"
},
"organization": {
"id": "91f49eac-fa39-486b-b1fd-3d655bd6bfe1",
"pref_label": {
"en": "University of Helsinki"
},
"homepage": {
"url": "http://www.helsinki.fi/speechsciences/staff/index.htm"
},
"email": "<hidden>"
}
},
{
"id": "80d3d223-deef-452b-97e2-a0d3d904be7a",
"roles": [
"curator"
],
"organization": {
"id": "15f18503-2661-4ab7-aa4d-a7a0981762fc",
"pref_label": {
"en": "University of Helsinki"
},
"email": "<hidden>"
}
},
{
"id": "4c431ba8-bc2c-4c0c-9eba-1532f9044cb5",
"roles": [
"curator"
],
"organization": {
"id": "b71ad3c3-3316-436d-a696-c79486dd41e4",
"pref_label": {
"en": "CSC — IT Center for Science Ltd",
"fi": "CSC - Tieteen tietotekniikan keskus Oy"
},
"homepage": {
"url": "http://www.csc.fi/english"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The PERSO speech database is the result of two research projects: Personalized Hidden Markov Modeling –based Text-To-Speech Synthesis: Assistive Technology for People with Communication Disabilities (funded by Tekes) and Adaptive spatial model for vocal expression: emotional speech synthesis for Finnish (funded by Academy of Finland). The projects were carried out between 2009 and 2012. The speech database was collected in 2011 and 2012. These projects are followed by the international Simple4All speech synthesis project funded by EU.\n\nThe purpose of these projects is to create more appropriate speech synthesis options for TTS (Text-To-Speech) conversion applications like assistive communication devices. Speech synthesis products are often generic and their application possibilities are narrow, so there’s a clear need for wider range of synthesis voices and styles. The PERSO corpus consists of single speaker databases with Finnish read and spontaneous speech from 33 men and 33 women, 60 of which are smaller (~ 40 minutes continuous speech/subject) and 6 larger (~ 4 hours of continuous speech/subject) databases. The speech data are packaged with associated text files.\n\nThe PERSO corpus will be published at https://lat.csc.fi/ for non-commercial scientific use only.\n\nFor detailed information on the license of the resource see https://www.kielipankki.fi/support/clarin-eula/#aca.\n\nMore information on the corpus: http://blogs.helsinki.fi/phonetics/category/projects/perso/\n\nlog\n26.11.2018 link http://islrn.org/resources/651-108-565-673-5 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141355783497",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-2014073053",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "PERSO Databases for Finnish Speech Synthesis"
},
"created": "2018-06-19T12:37:15Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "d74085cd-496e-42d7-a29c-3cf2a78eb45e",
"title": {
"en": "PERSO Databases for Finnish Speech Synthesis"
},
"persistent_identifier": "urn:nbn:fi:lb-2014073053",
"state": "published",
"created": "2018-06-19T12:37:15Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "4eb5a13e-bf9e-489c-b108-7bf15cf08a7a",
"access_rights": {
"id": "6c52516c-2820-4569-96b2-6fc3a6476a11",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "3b846f22-d888-4daa-ba1e-fe6a2eb140a9",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "e7bcc83f-7bfa-4194-9ace-908e57b1997c",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
},
{
"id": "a1658843-310d-468f-8a12-a96c0781d6a8",
"roles": [
"curator"
],
"person": {
"id": "b364de9e-daf5-4662-8177-5af12fa2ffc4",
"name": "Leila Kääntä",
"email": "<hidden>"
},
"organization": {
"id": "e7bcc83f-7bfa-4194-9ace-908e57b1997c",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Weblogs written by Finns as HT track files.\n\nlog \n26.11.2018 link http://islrn.org/resources/493-088-202-360-5 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141259718418",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730109",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Weblog Corpus"
},
"created": "2018-06-19T12:39:02Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "4eb5a13e-bf9e-489c-b108-7bf15cf08a7a",
"title": {
"en": "Weblog Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730109",
"state": "published",
"created": "2018-06-19T12:39:02Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "ac9fa3b3-4c1d-4a87-b917-4a060eec2b50",
"access_rights": {
"id": "f13e1dba-5fab-49f4-859a-3a6217c8c7a1",
"license": [
{
"id": "29323037-2644-45f6-9b44-d9d2a9375c26",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaAcaNCDep",
"title": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0",
"und": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "77a4dcb4-4249-41fc-8f89-dbfafba6a494",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "4a9b2f23-3457-422a-b2b7-1e5be27bb2f8",
"name": "Larisa Leisiö",
"email": "<hidden>"
},
"organization": {
"id": "0c1aa474-901b-44b2-8a70-544cc1d2f842",
"pref_label": {
"en": "University of Tampere"
},
"homepage": {
"url": "http://www.uta.fi/ltl/en/index.html"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The corpus contains video and audio recordings from 1986-2013 of fairy tales, songs, biographies, recollections and stories, as well as discussions on everyday issues in Nganasan and their linguistic transcripts. The corpus contains also photographs.\n\nThe Nganasan Speech Corpus will be made available in LAT (https://lat.csc.fi).\n\nMore license information: https://www.kielipankki.fi/lic/aca_fin_0-91/.",
"fi": "Korpus sisältää nganasaninkielisiä videoita ja äänitteitä, joissa satuja, lauluja, elämäkertoja, muisteluksia ja tarinoita sekä keskusteluja arkiasioista, ajalta 1986-2013 ja niiden kielitieteellisiä litterointeja. Korpus sisältää myös valokuvia.\n\nNganasanin kielen puhekorpus julkaistaan LATissa (https://lat.csc.fi).\n\nLisätietoa kielivaran lisenssistä: https://www.kielipankki.fi/lic/aca_fin_0-91/."
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "4e50c815-9511-42fe-8d99-c593fc6106df",
"url": "http://lexvo.org/id/iso639-3/nio",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Nganasan",
"fi": "Nganasanin kieli",
"sv": "Nganasaniska"
}
},
{
"id": "7cdd1202-69d2-415e-9361-e9b5e5f6beef",
"url": "http://lexvo.org/id/iso639-3/rus",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Russian",
"fi": "venäjä",
"sv": "ryska"
}
},
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141606575410",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-2014100302",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Nganasan Speech Corpus",
"fi": "Nganasanin kielen puhekorpus"
},
"created": "2018-06-19T12:41:20Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "ac9fa3b3-4c1d-4a87-b917-4a060eec2b50",
"title": {
"en": "Nganasan Speech Corpus",
"fi": "Nganasanin kielen puhekorpus"
},
"persistent_identifier": "urn:nbn:fi:lb-2014100302",
"state": "published",
"created": "2018-06-19T12:41:20Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "6c14abee-f865-4ec6-9df4-05efa9fc2e87",
"access_rights": {
"id": "8fec0e1f-4c55-49fe-9a7e-fbca6a43d09e",
"license": [
{
"id": "d11e207f-2242-448c-86dd-47b1920bdbd8",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaRes",
"title": {
"en": "CLARIN RES (Restricted) End-User License 1.0",
"und": "CLARIN RES (Restricted) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "a1fc5e99-c504-429c-a8de-1692b3404fc7",
"roles": [
"creator",
"publisher"
],
"person": {
"id": "04dad383-af2c-44c6-822c-33cf91b5185d",
"name": "Jussi Niemi",
"email": "<hidden>"
},
"organization": {
"id": "96c47281-84a0-404b-b698-3575ad3c2506",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
},
{
"id": "980e4b5e-c1ee-42c1-b367-81640df7318c",
"roles": [
"curator"
],
"person": {
"id": "f5f3ae5a-653b-4108-a48e-26d7961ca4ae",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpora of semi-spontaneous speech of two Finnish agrammatic (Broca) aphasics (with English morphological interlinears and translations).\n\nabout 2000 word tokens in total, about 14 person months\n\nRelevant publication(s) using the corpus: Niemi, Jussi, Matti Laine, Ritva Hänninen & Päivi Koivuselkä- Sallinen: Agrammatism in Finnish: Two Case Studies. In: L. Menn & L. K. Obler (eds.): Agrammatic Aphasia: A Cross-Language Narrative Sourcebook. Pp. 1013 - 1085. Benjamins, Amsterdam 1990. Supplement to Chapter 14 - Finnish-Language Materials: Control Subjects, pp. 1775-1818.\n\nlog\n25.11.2018 link http://islrn.org/resources/163-858-701-729-9 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141326591726",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730128",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Joensuu Agrammatic Aphasia Corpus"
},
"created": "2018-06-19T12:43:12Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "6c14abee-f865-4ec6-9df4-05efa9fc2e87",
"title": {
"en": "Joensuu Agrammatic Aphasia Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730128",
"state": "published",
"created": "2018-06-19T12:43:12Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "0c338a75-2821-49cb-ab8a-c7f37a382afd",
"access_rights": {
"id": "8750a2d7-6ecf-45a9-8486-d8dc5d8f15eb",
"license": [
{
"id": "d11e207f-2242-448c-86dd-47b1920bdbd8",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaRes",
"title": {
"en": "CLARIN RES (Restricted) End-User License 1.0",
"und": "CLARIN RES (Restricted) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "6a872886-1aae-4bf5-aeda-c7cc8dea0c3c",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "4cbd30e9-9fd9-45d6-9d37-4b86b0ca7e30",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
},
{
"id": "52d3dc62-47b6-4ed1-8cba-789bdc52e095",
"roles": [
"curator"
],
"person": {
"id": "4d64fb6e-fdc0-466e-b571-81ec3e025a8e",
"name": "Sinikka Niemi",
"email": "<hidden>"
},
"organization": {
"id": "4cbd30e9-9fd9-45d6-9d37-4b86b0ca7e30",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpus (list) of Swedish compounds in Göteborgs-Posten (a Swedish newspaper) data-base of 24.2 million word tokens originally collected by Elisabeth Ahlsén (Linguistics, Göteborg University) and eventually morphologically tagged by Matti Laine’s and Patrick Virtanen’s WordMill Lexical Search program (Center for Cognitive Neuroscience, U. Turku).\n\nabout 3800 compound tokens, with their WordMill variables (incl. frequency of use in the Göteborgs-Posten), about 3 person months\n\nRelevant publication(s) using the corpus: S. Niemi: Compounds in Swedish. Lingue e Linguaggio 8: 257-269. Part of cross-linguistic study of compounds, co-ordinated by Sergio Scalise (Linguistics, U. Bologna), see http://morbo.lingue.unibo.it/mmm/enlm.php\n\nlog\n25.11.2018 link http://islrn.org/resources/128-829-996-277-5 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-04-17",
"keyword": [],
"language": [
{
"id": "3c34537c-26bf-4c12-95b6-a3a84aad11c6",
"url": "http://lexvo.org/id/iso639-3/swe",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Swedish",
"fi": "ruotsi",
"sv": "svenska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141327847251",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730122",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Joensuu Corpus of Swedish Compounds"
},
"created": "2018-06-19T12:41:27Z",
"modified": "2020-04-17T00:00:00Z",
"dataset_versions": [
{
"id": "0c338a75-2821-49cb-ab8a-c7f37a382afd",
"title": {
"en": "Joensuu Corpus of Swedish Compounds"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730122",
"state": "published",
"created": "2018-06-19T12:41:27Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "80fa69af-addb-4f9a-b45c-c16011bae366",
"access_rights": {
"id": "e3d451d6-036b-46da-9e82-bbd90c0f2b13",
"license": [
{
"id": "35b18e72-3819-4ec6-a1be-14624f29d968",
"custom_url": "https://creativecommons.org/licenses/by/4.0/",
"title": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)",
"und": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC-BY-4.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
}
}
],
"access_type": {
"id": "d01ac02c-fc70-4c68-9434-8383cb693ff0",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": [],
"show_file_metadata": true
},
"actors": [
{
"id": "81986319-c723-40ac-9765-3a4c097c8ba0",
"roles": [
"creator",
"publisher"
],
"person": {
"id": "bb4b6486-ebac-4b3d-a2dc-4081ed5bac5f",
"name": "Sami Kairajärvi",
"email": "<hidden>"
},
"organization": {
"id": "f2de2fbb-51b3-4f42-9f72-ff16c225e817",
"pref_label": {
"en": "Informaatioteknologian tiedekunta",
"fi": "Informaatioteknologian tiedekunta",
"sv": "Informaatioteknologian tiedekunta",
"und": "Informaatioteknologian tiedekunta"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906-217000",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization",
"parent": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
},
{
"id": "0bbf03c1-f05a-480e-bb41-107f6c4cc1f0",
"roles": [
"creator"
],
"person": {
"id": "bbc856a5-a42a-4482-a8e4-e44deea58c6c",
"name": "Andrei Costin",
"email": "<hidden>",
"external_identifier": "http://orcid.org/0000-0002-2704-9715"
},
"organization": {
"id": "f2de2fbb-51b3-4f42-9f72-ff16c225e817",
"pref_label": {
"en": "Informaatioteknologian tiedekunta",
"fi": "Informaatioteknologian tiedekunta",
"sv": "Informaatioteknologian tiedekunta",
"und": "Informaatioteknologian tiedekunta"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906-217000",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization",
"parent": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
},
{
"id": "c6f71b45-df74-47fb-80db-e1fbaf535b0c",
"roles": [
"contributor"
],
"person": {
"id": "708a5421-0d4a-4882-92ec-c3b0184b31d0",
"name": "Timo Hämäläinen",
"email": "<hidden>",
"external_identifier": "http://orcid.org/0000-0002-4168-9102"
},
"organization": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-ida",
"description": {
"en": "This repository holds two datasets: one with both the original binaries and the code sections extracted from them (“full dataset”), and one with only the code sections (“only code sections”). The code sections were extracted by carving out sections of the binary that were marked as executable. The binaries were scraped from Debian repositories.\n\nThere are also two CSV files available, one with full binaries and one with only code sections, which include the 293 features extracted from about 3000 binaries per architecture. These features can be used to train classifiers.\n\nThe dataset consists of thousands of binaries for the following 23 architectures: alpha, amd64, arm64, armel, armhf, hppa, i386, ia64, m68k, mips, mips64el, mipsel, powerpc, powerpcspe, powerpc64, powerpc64el, riscv, s390, s390x, sh4, sparc, sparc64 and x32.\n\nThere are 98 500 binary files, about 27 gigabytes (uncompressed) of binary files and about 15 gigabytes (uncompressed) of only code sections from those binary files.\n\nBoth datasets hold the binaries in directories named by the architecture. The files inside the folders are named as MD5 hashes of the original binary files, and a hash file ending with “.code” contains only the concatenation of all code sections of the original binary file. Each architecture folder also holds a JSON file named after the architecture, e.g. amd64 holds amd64.json. The structure of the JSON file is as follows (described in a JSON Schema-like notation):\n\n \"architecture\": {\n\n \"type\": \"string\",\n \"description\": \"Name of the architecture\"\n },\n \"code_sections\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n },\n \"description\": \"Names of the code sections that were used\"\n },\n \"endianness\": {\n \"type\": \"string\",\n \"enum\": [\n \"big\",\n \"little\"\n ],\n \"description\": \"Endianness of the binary file\"\n },\n \"filehash\": {\n \"type\": \"string\",\n \"description\": \"MD5 hash of the original binary file\"\n },\n \"fileinfo\": {\n \"type\": \"string\",\n \"description\": \"Output of running Linux 'file' command on the original binary file\"\n },\n \"filename\": {\n \"type\": \"string\",\n \"description\": \"Path where the binary file was located in the Debian package\"\n },\n \"filesize\": {\n \"type\": \"integer\",\n \"decription\": \"File size of the original binary file in bytes\"\n },\n \"wordsize\": {\n \"type\": \"ingeter\",\n \"description\": \"Wordsize of the binary file\"\n },\n \"deb_package\": {\n \"type\": \"string\",\n \"description\": \"Name of the debian package where the binary file was extracted from\"\n },\n \"only_code\": {\n \"type\": \"string\",\n \"description\": \"Name of the file which holds only the executable code sections of the original binary file. Should be the md5 sum with .code extension\"\n },\n \"only_code_size\": {\n \"type\": \"integer\",\n \"description\": \"File size of the 'only code' file in bytes\"\n }\n\nThis work is based on work by \nJohn Clemens, 2015, “Automatic classification of object code using machine learning” and\nDe Nicolao, Pietro et al., 2018, “ELISA: ELiciting ISA of Raw Binaries for Fine-Grained Code and Data Separation”\n\nThis dataset is released as part of the following papers:\n\nSami Kairajärvi, Andrei Costin, and Timo Hämäläinen. 2020. ISAdetect: Usable automated detection of ISA (CPU architecture and endianness) for executable binary files and object code. In Tenth ACM Conference on Data and Application Security and Privacy (CODASPY’20), March 16–18, 2020, New Orleans, LA, USA. ACM, New York, NY, USA, 5 pages. https://doi.org/10.1145/3374664.3375742\n\nKairajärvi, Sami, Andrei Costin, and Timo Hämäläinen. \"Towards usable automated detection of CPU architecture and endianness for arbitrary binary files and object code sequences.\" arXiv preprint arXiv:1908.05459 (2019).\n\nKairajärvi, Sami. \"Automatic identification of architecture and endianness using binary file contents.\" (2019).\n\nThe code associated with this dataset can be found at https://github.com/kairis/isadetect\n\n\nChangelog:\nversion 6 - 29.3.2020\n- Add Weka models \n\nversion 5 - 17.1.2020\n- Clean up dataset\n\nversion 4 - 13.1.2020\n- Initial release"
},
"field_of_science": [
{
"id": "33d291b9-9b23-4192-b878-cffc210af1d3",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta113",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Computer and information sciences",
"fi": "Tietojenkäsittely ja informaatiotieteet",
"sv": "Data- och informationsvetenskap"
}
}
],
"fileset": {
"storage_service": "ida",
"csc_project": "2001375",
"total_files_count": 14,
"total_files_size": 22535906358
},
"infrastructure": [],
"issued": "2020-03-29",
"keyword": [
"Object code",
"Binary code",
"Binary file",
"Binary code analysis",
"Firmware analysis",
"Instruction Set Architecture (ISA)",
"Supervised machine learning",
"Reverse engineering",
"Malware analysis",
"Digital forensics"
],
"language": [],
"metadata_owner": {
"id": "8db456a1-f79a-4b88-ab57-4dd8e0d291f3",
"organization": "jyu.fi"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:att:d58324bd-1cf9-49cf-99cd-5bc2ba781e38",
"pid_generated_by_fairdata": true,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"created": "2020-03-29T08:29:03Z",
"modified": "2020-03-29T08:29:03Z",
"dataset_versions": [
{
"id": "80fa69af-addb-4f9a-b45c-c16011bae366",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:d58324bd-1cf9-49cf-99cd-5bc2ba781e38",
"state": "published",
"created": "2020-03-29T08:29:03Z",
"version": 6
},
{
"id": "9f6203f5-2360-426f-b9df-052f3f936ed2",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:693a3e3a-976a-4eac-8c3d-a4a62619f8b1",
"state": "published",
"created": "2020-01-17T21:35:22Z",
"version": 5
},
{
"id": "ed92408a-7bc6-4061-892f-101f85ee6202",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:3ab499c2-7867-4b16-9aef-c57140e0623b",
"state": "published",
"created": "2020-01-13T12:45:30Z",
"version": 4
},
{
"id": "03fe55df-a1bb-4d24-8c8b-eb12eb3b74f1",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:4dbe7751-bab9-47be-85d1-bf535a23b59a",
"state": "published",
"created": "2020-01-05T10:25:52Z",
"removed": "2020-01-13T13:03:28Z",
"version": 3
},
{
"id": "932abedc-3959-4cc6-9c0d-910eaac37117",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:fef41a31-0b7f-4903-8df7-38c65c2455cf",
"state": "published",
"created": "2020-01-05T10:01:57Z",
"removed": "2020-01-05T10:26:09Z",
"version": 2
},
{
"id": "6ac31f30-b12e-4c7d-a736-4b98bcffacd8",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:4840d323-9537-432d-af0a-ce2c6914ab82",
"state": "published",
"created": "2020-01-04T11:57:54Z",
"removed": "2020-01-05T10:26:06Z",
"deprecated": "2020-01-05T10:03:43Z",
"version": 1
}
],
"published_revision": 1,
"version": 6,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "9f6203f5-2360-426f-b9df-052f3f936ed2",
"access_rights": {
"id": "28d5ca27-69f6-47d7-9ea8-b44d942392de",
"license": [
{
"id": "35b18e72-3819-4ec6-a1be-14624f29d968",
"custom_url": "https://creativecommons.org/licenses/by/4.0/",
"title": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)",
"und": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC-BY-4.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
}
}
],
"access_type": {
"id": "d01ac02c-fc70-4c68-9434-8383cb693ff0",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": [],
"show_file_metadata": true
},
"actors": [
{
"id": "938f923e-81f3-4766-849b-19c4f1843d80",
"roles": [
"creator",
"publisher"
],
"person": {
"id": "8b5a3af9-7a9a-4bff-820e-5ed2d30ed869",
"name": "Sami Kairajärvi",
"email": "<hidden>"
},
"organization": {
"id": "f2de2fbb-51b3-4f42-9f72-ff16c225e817",
"pref_label": {
"en": "Informaatioteknologian tiedekunta",
"fi": "Informaatioteknologian tiedekunta",
"sv": "Informaatioteknologian tiedekunta",
"und": "Informaatioteknologian tiedekunta"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906-217000",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization",
"parent": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
},
{
"id": "fb8eae76-5399-4344-a7ab-2f51b2c8a6a3",
"roles": [
"creator"
],
"person": {
"id": "9b243783-7c70-46a5-9d2a-9bd56caa9ca3",
"name": "Andrei Costin",
"email": "<hidden>",
"external_identifier": "http://orcid.org/0000-0002-2704-9715"
},
"organization": {
"id": "f2de2fbb-51b3-4f42-9f72-ff16c225e817",
"pref_label": {
"en": "Informaatioteknologian tiedekunta",
"fi": "Informaatioteknologian tiedekunta",
"sv": "Informaatioteknologian tiedekunta",
"und": "Informaatioteknologian tiedekunta"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906-217000",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization",
"parent": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
},
{
"id": "94c239d4-5f32-4822-abfc-8ed21b2905fe",
"roles": [
"contributor"
],
"person": {
"id": "292df1c9-7867-4a55-8601-9aca2161b52a",
"name": "Timo Hämäläinen",
"email": "<hidden>",
"external_identifier": "http://orcid.org/0000-0002-4168-9102"
},
"organization": {
"id": "d2e33900-ae70-494e-9eea-d9c9c5667d92",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-ida",
"description": {
"en": "This repository holds two datasets: one with both the original binaries and the code sections extracted from them (“full dataset”), and one with only the code sections (“only code sections”). The code sections were extracted by carving out sections of the binary that were marked as executable. The binaries were scraped from Debian repositories.\n\nThere are also two CSV files available, one with full binaries and one with only code sections, which include the 293 features extracted from about 3000 binaries per architecture. These features can be used to train classifiers.\n\nThe dataset consists of thousands of binaries for the following 23 architectures: alpha, amd64, arm64, armel, armhf, hppa, i386, ia64, m68k, mips, mips64el, mipsel, powerpc, powerpcspe, powerpc64, powerpc64el, riscv, s390, s390x, sh4, sparc, sparc64 and x32.\n\nThere are 98 500 binary files, about 27 gigabytes (uncompressed) of binary files and about 15 gigabytes (uncompressed) of only code sections from those binary files.\n\nBoth datasets hold the binaries in directories named by the architecture. The files inside the folders are named as MD5 hashes of the original binary files, and a hash file ending with “.code” contains only the concatenation of all code sections of the original binary file. Each architecture folder also holds a JSON file named after the architecture, e.g. amd64 holds amd64.json. The structure of the JSON file is as follows (described in a JSON Schema-like notation):\n\n \"architecture\": {\n\n \"type\": \"string\",\n \"description\": \"Name of the architecture\"\n },\n \"code_sections\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n },\n \"description\": \"Names of the code sections that were used\"\n },\n \"endianness\": {\n \"type\": \"string\",\n \"enum\": [\n \"big\",\n \"little\"\n ],\n \"description\": \"Endianness of the binary file\"\n },\n \"filehash\": {\n \"type\": \"string\",\n \"description\": \"MD5 hash of the original binary file\"\n },\n \"fileinfo\": {\n \"type\": \"string\",\n \"description\": \"Output of running Linux 'file' command on the original binary file\"\n },\n \"filename\": {\n \"type\": \"string\",\n \"description\": \"Path where the binary file was located in the Debian package\"\n },\n \"filesize\": {\n \"type\": \"integer\",\n \"decription\": \"File size of the original binary file in bytes\"\n },\n \"wordsize\": {\n \"type\": \"ingeter\",\n \"description\": \"Wordsize of the binary file\"\n },\n \"deb_package\": {\n \"type\": \"string\",\n \"description\": \"Name of the debian package where the binary file was extracted from\"\n },\n \"only_code\": {\n \"type\": \"string\",\n \"description\": \"Name of the file which holds only the executable code sections of the original binary file. Should be the md5 sum with .code extension\"\n },\n \"only_code_size\": {\n \"type\": \"integer\",\n \"description\": \"File size of the 'only code' file in bytes\"\n }\n\nThis work is based on work by \nJohn Clemens, 2015, “Automatic classification of object code using machine learning” and\nDe Nicolao, Pietro et al., 2018, “ELISA: ELiciting ISA of Raw Binaries for Fine-Grained Code and Data Separation”\n\nThis dataset is released as part of the following papers:\n\nSami Kairajärvi, Andrei Costin, and Timo Hämäläinen. 2020. ISAdetect: Usable automated detection of ISA (CPU architecture and endianness) for executable binary files and object code. In Tenth ACM Conference on Data and Application Security and Privacy (CODASPY’20), March 16–18, 2020, New Orleans, LA, USA. ACM, New York, NY, USA, 5 pages. https://doi.org/10.1145/3374664.3375742\n\nKairajärvi, Sami, Andrei Costin, and Timo Hämäläinen. \"Towards usable automated detection of CPU architecture and endianness for arbitrary binary files and object code sequences.\" arXiv preprint arXiv:1908.05459 (2019).\n\nKairajärvi, Sami. \"Automatic identification of architecture and endianness using binary file contents.\" (2019).\n\nThe code associated with this dataset can be found at https://github.com/kairis/isadetect"
},
"field_of_science": [
{
"id": "33d291b9-9b23-4192-b878-cffc210af1d3",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta113",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Computer and information sciences",
"fi": "Tietojenkäsittely ja informaatiotieteet",
"sv": "Data- och informationsvetenskap"
}
}
],
"fileset": {
"storage_service": "ida",
"csc_project": "2001375",
"total_files_count": 4,
"total_files_size": 22169432525
},
"infrastructure": [],
"issued": "2020-03-29",
"keyword": [
"Object code",
"Binary code",
"Binary file",
"Binary code analysis",
"Firmware analysis",
"Instruction Set Architecture (ISA)",
"Supervised machine learning",
"Reverse engineering",
"Malware analysis",
"Digital forensics"
],
"language": [],
"metadata_owner": {
"id": "8db456a1-f79a-4b88-ab57-4dd8e0d291f3",
"organization": "jyu.fi"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:att:693a3e3a-976a-4eac-8c3d-a4a62619f8b1",
"pid_generated_by_fairdata": true,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"created": "2020-01-17T21:35:22Z",
"modified": "2020-03-29T08:29:03Z",
"dataset_versions": [
{
"id": "80fa69af-addb-4f9a-b45c-c16011bae366",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:d58324bd-1cf9-49cf-99cd-5bc2ba781e38",
"state": "published",
"created": "2020-03-29T08:29:03Z",
"version": 6
},
{
"id": "9f6203f5-2360-426f-b9df-052f3f936ed2",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:693a3e3a-976a-4eac-8c3d-a4a62619f8b1",
"state": "published",
"created": "2020-01-17T21:35:22Z",
"version": 5
},
{
"id": "ed92408a-7bc6-4061-892f-101f85ee6202",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:3ab499c2-7867-4b16-9aef-c57140e0623b",
"state": "published",
"created": "2020-01-13T12:45:30Z",
"version": 4
},
{
"id": "03fe55df-a1bb-4d24-8c8b-eb12eb3b74f1",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:4dbe7751-bab9-47be-85d1-bf535a23b59a",
"state": "published",
"created": "2020-01-05T10:25:52Z",
"removed": "2020-01-13T13:03:28Z",
"version": 3
},
{
"id": "932abedc-3959-4cc6-9c0d-910eaac37117",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:fef41a31-0b7f-4903-8df7-38c65c2455cf",
"state": "published",
"created": "2020-01-05T10:01:57Z",
"removed": "2020-01-05T10:26:09Z",
"version": 2
},
{
"id": "6ac31f30-b12e-4c7d-a736-4b98bcffacd8",
"title": {
"en": "ISAdetect binary file and object code dataset"
},
"persistent_identifier": "urn:nbn:fi:att:4840d323-9537-432d-af0a-ce2c6914ab82",
"state": "published",
"created": "2020-01-04T11:57:54Z",
"removed": "2020-01-05T10:26:06Z",
"deprecated": "2020-01-05T10:03:43Z",
"version": 1
}
],
"published_revision": 1,
"version": 5,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "9804ea8e-7ae7-492f-b554-324079f849b0",
"access_rights": {
"id": "ac1f58ad-1d8d-4c5a-b5b6-8a6a66922519",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "1bd72f05-9fad-4b0b-b36a-7c5f23a08949",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "ae647ae7-306e-4def-937a-253681830213",
"name": "Heikki Keskustalo",
"email": "<hidden>"
},
"organization": {
"id": "a5628715-2646-41b9-aaef-bf1d7d09d906",
"pref_label": {
"en": "University of Tampere"
},
"homepage": {
"url": "http://www.uta.fi/english/"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpora in Finnish, Swedish and English languages (newspaper texts), with requests and relevance information used in information retrieval evaluation. \n\nAbout 142.2, 42.5, and 251 million word tokens respectively; or 1088MB, 281 MB, and 1530 MB respectively.\n\nlog\n25.11.2018 link to islrn.org/resources/435-757-708-421-8 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
},
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
},
{
"id": "3c34537c-26bf-4c12-95b6-a3a84aad11c6",
"url": "http://lexvo.org/id/iso639-3/swe",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Swedish",
"fi": "ruotsi",
"sv": "svenska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141248737252",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730175",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Corpora of Newspaper Texts"
},
"created": "2018-06-19T12:42:31Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "9804ea8e-7ae7-492f-b554-324079f849b0",
"title": {
"en": "Corpora of Newspaper Texts"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730175",
"state": "published",
"created": "2018-06-19T12:42:31Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "dea5f159-03a2-483b-a4d3-eee05e4058ac",
"access_rights": {
"id": "ed7745bd-57c9-47cc-9e1f-293b8612e9d1",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "d311756e-5346-4641-b03b-c23e6fa28bb8",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "f722b946-d977-4b77-bced-dbc15f8a4ae1",
"name": "Terho Joutsen",
"email": "<hidden>"
},
"organization": {
"id": "baa065c4-8e64-47e3-a776-143eeda75ced",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Corpus of Spoken Modern French, transcriptions included.\n\nlog\n25.11.2018 link islrn.org/resources/802-128-132-924-0 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "5068bdfc-8176-4b93-b23b-c94ba4fe21de",
"url": "http://lexvo.org/id/iso639-3/fra",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "French",
"fi": "ranska",
"sv": "franska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141325264280",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730107",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Corpus of Spoken Modern French"
},
"created": "2018-06-19T12:42:58Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "dea5f159-03a2-483b-a4d3-eee05e4058ac",
"title": {
"en": "Corpus of Spoken Modern French"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730107",
"state": "published",
"created": "2018-06-19T12:42:58Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "92285d4e-9357-43e1-a1da-59383603d734",
"access_rights": {
"id": "8c8e176d-0576-496f-b6bb-5db3f60ee300",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "f6635b9f-358f-4db3-ad54-712b0527d94d",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "a323d2c3-f66b-4c5a-8da8-739c01923acb",
"name": "Matti Rahkonen",
"email": "<hidden>"
},
"organization": {
"id": "4a681e14-cc4f-4a39-a13a-092f9254ffe6",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Three corpora of popular Swedish language textbooks (Toppen, Nya vindar (1980s), and Medvind (1991-93)); morphological and syntactic tagging.\n\nlog\n25.11.2018 link islrn.org/resources/757-145-024-313-2 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "3c34537c-26bf-4c12-95b6-a3a84aad11c6",
"url": "http://lexvo.org/id/iso639-3/swe",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Swedish",
"fi": "ruotsi",
"sv": "svenska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141443759603",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730154",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Corpora of Swedish Language Textbooks"
},
"created": "2018-06-19T12:42:52Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "92285d4e-9357-43e1-a1da-59383603d734",
"title": {
"en": "Corpora of Swedish Language Textbooks"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730154",
"state": "published",
"created": "2018-06-19T12:42:52Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "855cd4dc-8b9d-46c0-99ae-083ab288df98",
"access_rights": {
"id": "e1d137fe-7053-45f2-a929-27bd38dbdace",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "36d88fe1-72ee-484a-8abc-38fab820d9db",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "b66e864f-c44f-4856-9d79-a36eeac1c76a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
},
{
"id": "8ede9e4d-a84a-4d00-be8e-e6495aeace53",
"roles": [
"curator"
],
"person": {
"id": "27fc49df-0760-40de-aa30-d3878ed8042c",
"name": "Leila Kääntä",
"email": "<hidden>"
},
"organization": {
"id": "b66e864f-c44f-4856-9d79-a36eeac1c76a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Fan fiction texts (e.g. on Xena, Final Fantasy, Buffy, Angel, Discworld, Harry Potter, LOTR) written by Finns, saved as .html and .txt files. \n\nlog\n25.11.2018 link http://islrn.org/resources/931-771-497-304-1 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141350632661",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730108",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Fan Fiction Corpus"
},
"created": "2018-06-19T12:40:16Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "855cd4dc-8b9d-46c0-99ae-083ab288df98",
"title": {
"en": "Fan Fiction Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730108",
"state": "published",
"created": "2018-06-19T12:40:16Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "249178fe-79f2-4c2e-86ad-4ef5c08616c6",
"access_rights": {
"id": "2b16f530-cc72-40fe-b575-e4661ff4ea4b",
"license": [
{
"id": "d11e207f-2242-448c-86dd-47b1920bdbd8",
"custom_url": "https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaRes",
"title": {
"en": "CLARIN RES (Restricted) End-User License 1.0",
"und": "CLARIN RES (Restricted) End-User License 1.0"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "0f12cbe9-7fc0-4ceb-aeb3-ff4d285d53e3",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "848949a0-5750-4a64-aa3e-aa5112a95093",
"name": "Jussi Niemi",
"email": "<hidden>"
},
"organization": {
"id": "b39f47a5-7eac-4c39-b0f7-fefc1c9ddd06",
"pref_label": {
"en": "University of Eastern Finland"
},
"homepage": {
"url": "http://www.uef.fi/uef/english"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Computer corpus of Finnish telegraphese language (with English interlinears and translation).\n\nThe Finnish Telegraphese Corpus is a product of a cross-linguistic study of telegraphic language produced by normal adult subjects (university students) to describe a set of states of affairs. The responses were gathered in written questionnaire format, and the states of affairs to be described were held semantically identical across the languages studied. A selection of published studies of the project is listed below.\n\nTesak, Jürgen, Jussi Niemi & Päivi Koivuselkä-Sallinen: Telegraphese and ellipsis in German and Finnish: A comparison. In: C. Mair & M. Markus (eds.), New Departures in Contrastive Linguistics. Innsbrucker Beiträge zur Kulturwissenschaft, Anglistische Reihe Band 5. Innsbruck 1992. Pp. 75-83. \nKoivuselkä-Sallinen, Päivi, Jussi Niemi & Jürgen Tesak (1993): Word Order in Simple Structures in Finnish and German. In: A. Crochetiere, J.-C. Boulanger & C. Ouellon (eds.), Actes du Xve Congres International des Linguistes, Quebec, Universite Laval, 9-14 aout 1992/Proceedings of the Xvth International Congress of Linguists, Quebec, Universite Laval, 9-14 August 1992. Sainte-Foy: PU Laval, 1993, Vol. III, pp. 489-492.\nTesak, Jürgen, Elisabeth Ahlsén, Gábor Györi, Päivi Koivuselkä-Sallinen, Jussi Niemi & Livia Tonelli: Patterns of ellipsis in telegraphese: A study of six languages. Folia Linguistica 24: 297-316 (1995).\nNiemi, Jussi, Jürgen Tesak & Päivi Koivuselkä-Sallinen: Telegraphic style and agrammatism in Finnish and German. In: L. Heltoft & H. Haberland (eds.), Proceedings of the Thirteenth Scandinavian Conference of Linguistics. Department of Language and Culture, Roskilde University (1996). Pp. 483 - 493.\nTesak, Jürgen & Jussi Niemi: Telegraphese and agrammatism: A cross-linguistic study. Aphasiology 11: 145-155 (1997).\n\nAbout 3000 word tokens in total, about 4 person months.\n\nlog\n25.11.2018 link http://islrn.org/resources/907-588-147-641-6 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "c757dc29-552d-48b2-9efc-4d53b939a7ef",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
},
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141419272291",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730125",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Finnish Telegraphese Corpus"
},
"created": "2018-06-19T12:41:53Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "249178fe-79f2-4c2e-86ad-4ef5c08616c6",
"title": {
"en": "Finnish Telegraphese Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730125",
"state": "published",
"created": "2018-06-19T12:41:53Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "1b3db195-182b-4801-929c-c6331f6a415d",
"access_rights": {
"id": "de261f4a-6d11-4e24-9804-0a89b26ee923",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "10d7cdad-e23a-451b-8c7f-7a807ae14d4f",
"roles": [
"creator",
"curator"
],
"organization": {
"id": "65bbb532-034e-48e5-8ba6-09c1b45c6b68",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
},
{
"id": "b5ff5b24-8d13-4a6c-a446-043fc329b27f",
"roles": [
"curator"
],
"person": {
"id": "677db27b-1157-459a-bced-94dda6d29e43",
"name": "Leila Kääntä",
"email": "<hidden>"
},
"organization": {
"id": "65bbb532-034e-48e5-8ba6-09c1b45c6b68",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Videotaped English lessons from lower secondary and upper secondary (gymnasium) schools in .avi and .mpg2 formats.\n\nlog \n25.11.2018 link islrn.org/resources/717-352-884-538-9 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-17",
"keyword": [],
"language": [
{
"id": "ec748146-3403-4a7f-adfe-bdbb1b889372",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141554452099",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-2014073010",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "English as a Foreign Language Corpus"
},
"created": "2018-06-19T12:43:23Z",
"modified": "2020-03-17T00:00:00Z",
"dataset_versions": [
{
"id": "1b3db195-182b-4801-929c-c6331f6a415d",
"title": {
"en": "English as a Foreign Language Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-2014073010",
"state": "published",
"created": "2018-06-19T12:43:23Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "646148d3-3365-4859-9fe4-2ac297c340c2",
"access_rights": {
"id": "31de8493-c9bf-43a2-884d-bfe8501babac",
"license": [
{
"id": "35b18e72-3819-4ec6-a1be-14624f29d968",
"custom_url": "https://creativecommons.org/licenses/by/4.0/",
"title": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)",
"und": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC-BY-4.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
"fi": "Creative Commons Nimeä 4.0 Kansainvälinen (CC BY 4.0)"
}
}
],
"access_type": {
"id": "d01ac02c-fc70-4c68-9434-8383cb693ff0",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "3e14dca4-b0a0-4671-b95c-679dcf2ed02d",
"roles": [
"creator",
"curator"
],
"person": {
"id": "9da1c28e-1b02-4d1d-b77a-a79b849801c6",
"name": "Olesya Khanina",
"email": "<hidden>"
},
"organization": {
"id": "403afa7c-4b95-4f94-a915-9509764b0828",
"pref_label": {
"en": "Russian Academy of Sciences"
},
"homepage": {
"url": "https://iling-ran.ru/web/en"
},
"email": "<hidden>"
}
},
{
"id": "4a55246e-6130-44a7-9632-0d1a7ed2cd60",
"roles": [
"creator"
],
"person": {
"id": "187bb855-ab31-40b4-9614-8ac782d1f5fb",
"name": "Andrey Shluinsky",
"email": "<hidden>"
},
"organization": {
"id": "403afa7c-4b95-4f94-a915-9509764b0828",
"pref_label": {
"en": "Russian Academy of Sciences"
},
"homepage": {
"url": "https://iling-ran.ru/web/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The digital corpus of Forest and Tundra Enets has been created as a part of a documentation effort and contains examples of natural oral speech collected at the Tajmyr peninsula, Russia (everyday stories, traditional stories and tales, conversations, interviews, procedures/instructions). All texts are synchronized with audio (and for one third of the collection with video), transcribed in phonological transcription, translated into English and Russian, and glossed. The corpus contains modern recordings from the current generation of the Enets born in the 1940s–1960s and also digitized legacy recordings from the previous generation of their late parents born in the 1910s–1930s. A total number of 50 speakers are represented (36 speakers of Forest Enets and 14 speakers of Tundra Enets).\n\nThe corpus will be made available via the Language Bank of Finland.\n\nThe corpus was created by Olesya Khanina and Andrey Shluinsky (Institute of Linguistics, Russian Academy of Sciences). \nThe original legacy recordings were performed by linguists Kazimir Labanauskas, Eugen Helimski, Irina Sorokina, Darja Bolina, by a musicologist Oksana Dobzhanskaja, by a Forest Enets journalist of the local radio Nina Bolina, and by Forest Enets Viktor Pal’chin and his wife Marina Pal’china. The legacy recordings were digitized by Olesya Khanina and Andrey Shluinsky. The modern texts were recorded, transcribed, and translated by linguists Olesya Khanina and Andrey Shluinsky, with assistance of Maria Ovsjannikova, Natalia Stoynova, and Sergey Trubetskoy. The glossing of the corpus was realized by Olesya Khanina and Andrey Shluinsky.\n\nThe recording, transcription, and translation of the corpus was supported by the Hans Rausing Endangered Languages Project (London, SOAS). The glossing of the corpus was supported by the Max Planck Institute for Evolutionary Anthropology (Leipzig, Germany)."
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-16",
"keyword": [],
"language": [
{
"id": "95e2fd78-f666-44e1-91a7-c5c1ba6be48d",
"url": "http://lexvo.org/id/iso639-3/enf",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Forest Enets"
}
},
{
"id": "6e9da411-073d-40cb-bf70-dcf61b84a350",
"url": "http://lexvo.org/id/iso639-3/enh",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Tundra Enets"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2020031621",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [
{
"start_date": "1970-01-01",
"end_date": "2000-12-31"
}
],
"theme": [],
"title": {
"en": "Corpus of Forest and Tundra Enets",
"fi": "Metsä- ja tundraenetsin korpus"
},
"created": "2020-03-17T02:23:29Z",
"modified": "2020-03-16T00:00:00Z",
"dataset_versions": [
{
"id": "646148d3-3365-4859-9fe4-2ac297c340c2",
"title": {
"en": "Corpus of Forest and Tundra Enets",
"fi": "Metsä- ja tundraenetsin korpus"
},
"persistent_identifier": "urn:nbn:fi:lb-2020031621",
"state": "published",
"created": "2020-03-17T02:23:29Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
},
{
"id": "92e6a3f4-4f77-4029-8e97-5822a5f5800e",
"access_rights": {
"id": "613e599d-efc5-4503-865e-906935196c51",
"license": [
{
"id": "6bb760b4-08c4-43fe-88dd-eb27abe9f760",
"title": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken",
"und": "Neuvottelut kesken"
},
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/undernegotiation",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Under negotiation",
"fi": "Neuvottelut kesken"
}
}
],
"access_type": {
"id": "729ffd9f-6d7a-40e9-aa97-a363a16fd113",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "96821a6a-9522-4334-b30b-094040d51f60",
"roles": [
"creator",
"publisher",
"curator"
],
"person": {
"id": "5840d738-c9a7-406d-ad37-5d2ef82a92ba",
"name": "Matti Rahkonen",
"email": "<hidden>"
},
"organization": {
"id": "92c24817-d26e-468c-9c7c-079bb50dde5c",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto"
},
"homepage": {
"url": "https://www.jyu.fi/en"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "A longitudinal corpus Swedish language compositions written by Finnish-speaking upper secondary school (gymnasium) students in 1991-93; parts of speech tagged.\n\n100 students; 8 compositions from each, a total of 150 000 words.\n\n\nlog\n25.11.2018 link http://islrn.org/resources/248-895-085-557-0 removed"
},
"field_of_science": [
{
"id": "62b43705-b725-40cd-aa4f-f5b1465678e1",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"issued": "2020-03-16",
"keyword": [],
"language": [
{
"id": "3c34537c-26bf-4c12-95b6-a3a84aad11c6",
"url": "http://lexvo.org/id/iso639-3/swe",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Swedish",
"fi": "ruotsi",
"sv": "svenska"
}
}
],
"metadata_owner": {
"id": "541fdb9b-f8a8-4c75-a75d-13f413454d72",
"organization": "fairdata.fi"
},
"other_identifiers": [
{
"notation": "urn:nbn:fi:csc-kata20170221141304974541",
"identifier_type": {
"id": "9ea2eac4-87b3-477d-9ac6-107eb654ab2c",
"url": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type",
"pref_label": {
"en": "Uniform Resource Name (URN)"
}
},
"metax_ids": []
}
],
"persistent_identifier": "urn:nbn:fi:lb-20140730180",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Longi Corpus"
},
"created": "2018-06-19T12:42:33Z",
"modified": "2020-03-16T00:00:00Z",
"dataset_versions": [
{
"id": "92e6a3f4-4f77-4029-8e97-5822a5f5800e",
"title": {
"en": "Longi Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730180",
"state": "published",
"created": "2018-06-19T12:42:33Z",
"version": 1
}
],
"published_revision": 2,
"version": 1,
"api_version": 1,
"metadata_repository": "Fairdata"
}
]
}