{
  "_id": "6a103643acfb0bcc41c99fb5",
  "Package": "tosca",
  "Type": "Package",
  "Title": "Tools for Statistical Content Analysis",
  "Version": "0.3-4",
  "Authors@R": "c(person(\"Lars\", \"Koppers\", email=\"koppers@statistik.tu-dortmund.de\", role=c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-1642-9616\")),\nperson(\"Jonas\", \"Rieger\", email=\"jonas.rieger@tu-dortmund.de\", role=c(\"aut\"), comment = c(ORCID = \"0000-0002-0007-4478\")),\nperson(\"Karin\", \"Boczek\", email=\"karin.boczek@tu-dortmund.de\", role=c(\"ctb\"), comment = c(ORCID = \"0000-0003-1516-4094\")),\nperson(\"Gerret\", \"von Nordheim\", email=\"gerret.vonnordheim@tu-dortmund.de\", role=c(\"ctb\"), comment = c(ORCID = \"0000-0001-7553-3838\")))",
  "Description": "A framework for statistical analysis in content analysis.\nIn addition to a pipeline for preprocessing text corpora and\nlinking to the latent Dirichlet allocation from the 'lda'\npackage, plots are offered for the descriptive analysis of text\ncorpora and topic models. In addition, an implementation of\nChang's intruder words and intruder topics is provided. Sample\ndata for the vignette is included in the toscaData package,\nwhich is available on gitHub:\n<https://github.com/Docma-TU/toscaData>.",
  "URL": "https://github.com/Docma-TU/tosca,\nhttps://doi.org/10.5281/zenodo.3591068",
  "License": "GPL (>= 2)",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.2",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libicu-dev libxml2-dev libssl-dev",
  "Repository": "https://docma-tu.r-universe.dev",
  "Date/Publication": "2025-04-22 05:30:18 UTC",
  "RemoteUrl": "https://github.com/docma-tu/tosca",
  "RemoteRef": "HEAD",
  "RemoteSha": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-12 06:57:45 UTC",
    "User": "root"
  },
  "Author": "Lars Koppers [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-1642-9616>),\nJonas Rieger [aut] (ORCID: <https://orcid.org/0000-0002-0007-4478>),\nKarin Boczek [ctb] (ORCID: <https://orcid.org/0000-0003-1516-4094>),\nGerret von Nordheim [ctb] (ORCID:\n<https://orcid.org/0000-0001-7553-3838>)",
  "Maintainer": "Lars Koppers <koppers@statistik.tu-dortmund.de>",
  "MD5sum": "f3693cf2fc8cf66be1c937092ea1fe3c",
  "_user": "docma-tu",
  "_type": "src",
  "_file": "tosca_0.3-4.tar.gz",
  "_fileid": "426fc36d95e979857fa7daa090f54a24b32380122910db8da33cf610c98a1645",
  "_filesize": 821161,
  "_sha256": "426fc36d95e979857fa7daa090f54a24b32380122910db8da33cf610c98a1645",
  "_created": "2026-05-12T06:57:45.000Z",
  "_published": "2026-05-22T10:56:03.059Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77365645575,
      "time": 194,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6937686523"
    },
    {
      "job": 77365645859,
      "time": 195,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6937688122"
    },
    {
      "job": 77365645860,
      "time": 193,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6937675620"
    },
    {
      "job": 77365645714,
      "time": 221,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6937677659"
    },
    {
      "job": 77365645515,
      "time": 227,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6937630547"
    },
    {
      "job": 77365645380,
      "time": 142,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7158818004"
    },
    {
      "job": 77365645897,
      "time": 195,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6937687619"
    },
    {
      "job": 77365645875,
      "time": 140,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6937671358"
    },
    {
      "job": 77365645978,
      "time": 130,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6937668677"
    }
  ],
  "_buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/docma-tu/tosca",
  "_commit": {
    "id": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
    "author": "lkoppers <lars.koppers@tu-dortmund.de>",
    "committer": "lkoppers <lars.koppers@tu-dortmund.de>",
    "message": "Update global.R\n",
    "time": 1745299818
  },
  "_maintainer": {
    "name": "Lars Koppers",
    "email": "koppers@statistik.tu-dortmund.de",
    "orcid": "0000-0002-1642-9616"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "tm",
      "version": ">= 0.7-5",
      "role": "Imports"
    },
    {
      "package": "lda",
      "version": ">= 1.4.2",
      "role": "Imports"
    },
    {
      "package": "quanteda",
      "version": ">= 1.4.0",
      "role": "Imports"
    },
    {
      "package": "lubridate",
      "version": ">= 1.7.3",
      "role": "Imports"
    },
    {
      "package": "htmltools",
      "version": ">= 0.3.6",
      "role": "Imports"
    },
    {
      "package": "RColorBrewer",
      "version": ">= 1.1-2",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "version": ">= 1.3.1",
      "role": "Imports"
    },
    {
      "package": "WikipediR",
      "version": ">= 1.5.0",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "version": ">= 1.11.4",
      "role": "Imports"
    },
    {
      "package": "toscaData",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 2.0.0",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "version": ">= 1.20",
      "role": "Suggests"
    },
    {
      "package": "devtools",
      "version": ">= 1.13",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "version": ">= 1.9",
      "role": "Suggests"
    }
  ],
  "_owner": "docma-tu",
  "_selfowned": true,
  "_usedby": 1,
  "_updates": [],
  "_tags": [],
  "_stars": 17,
  "_contributors": [
    {
      "user": "jonasrieger",
      "count": 72,
      "uuid": 23215382
    },
    {
      "user": "lkoppers",
      "count": 61,
      "uuid": 17726209
    },
    {
      "user": "kbenoit",
      "count": 4,
      "uuid": 2182246
    },
    {
      "user": "karinboczek",
      "count": 1,
      "uuid": 25984223
    }
  ],
  "_userbio": {
    "uuid": 17725906,
    "type": "organization",
    "name": "DoCMA"
  },
  "_downloads": {
    "count": 293,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/tosca"
  },
  "_devurl": "https://github.com/docma-tu/tosca",
  "_searchresults": 104,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/readme.html",
    "extra/readme.md",
    "extra/tosca.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/docma-tu/tosca",
  "_realowner": "docma-tu",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1-0",
      "date": "2018-08-31"
    },
    {
      "version": "0.1-1",
      "date": "2018-09-03"
    },
    {
      "version": "0.1-2",
      "date": "2018-09-13"
    },
    {
      "version": "0.1-3",
      "date": "2019-01-17"
    },
    {
      "version": "0.1-4",
      "date": "2019-03-18"
    },
    {
      "version": "0.2-0",
      "date": "2020-03-10"
    },
    {
      "version": "0.3-1",
      "date": "2021-04-20"
    },
    {
      "version": "0.3-2",
      "date": "2021-10-28"
    },
    {
      "version": "0.3-4",
      "date": "2025-04-22"
    }
  ],
  "_exports": [
    "as.corpus.textmeta",
    "as.meta",
    "as.textmeta.corpus",
    "cleanTexts",
    "clusterTopics",
    "deleteAndRenameDuplicates",
    "duplist",
    "filterCount",
    "filterDate",
    "filterID",
    "filterWord",
    "importance",
    "intruderTopics",
    "intruderWords",
    "is.duplist",
    "is.textmeta",
    "is.textmeta_tidy",
    "LDAgen",
    "LDAprep",
    "makeWordlist",
    "mergeLDA",
    "mergeTextmeta",
    "plotArea",
    "plotFreq",
    "plotHeat",
    "plotScot",
    "plotTopic",
    "plotTopicWord",
    "plotWordpt",
    "plotWordSub",
    "precision",
    "readTextmeta",
    "readTextmeta.df",
    "readWhatsApp",
    "readWiki",
    "readWikinews",
    "recall",
    "removeHTML",
    "removeUmlauts",
    "removeXML",
    "sampling",
    "showMeta",
    "showTexts",
    "textmeta",
    "tidy.textmeta",
    "topicCoherence",
    "topicsInText",
    "topTexts",
    "topWords",
    "vprecision",
    "vrecall"
  ],
  "_help": [
    {
      "page": "as.corpus.textmeta",
      "title": "Transform textmeta to corpus",
      "topics": [
        "as.corpus.textmeta"
      ]
    },
    {
      "page": "as.meta",
      "title": "\"meta\" Component of \"textmeta\"-Objects",
      "topics": [
        "as.meta"
      ]
    },
    {
      "page": "as.textmeta.corpus",
      "title": "Transform corpus to textmeta",
      "topics": [
        "as.textmeta.corpus"
      ]
    },
    {
      "page": "cleanTexts",
      "title": "Data Preprocessing",
      "topics": [
        "cleanTexts"
      ]
    },
    {
      "page": "clusterTopics",
      "title": "Cluster Analysis",
      "topics": [
        "clusterTopics"
      ]
    },
    {
      "page": "deleteAndRenameDuplicates",
      "title": "Deletes and Renames Articles with the same ID",
      "topics": [
        "deleteAndRenameDuplicates"
      ]
    },
    {
      "page": "duplist",
      "title": "Creating List of Duplicates",
      "topics": [
        "duplist",
        "is.duplist",
        "print.duplist",
        "summary.duplist"
      ]
    },
    {
      "page": "filterCount",
      "title": "Subcorpus With Count Filter",
      "topics": [
        "filterCount",
        "filterCount.default",
        "filterCount.textmeta"
      ]
    },
    {
      "page": "filterDate",
      "title": "Subcorpus With Date Filter",
      "topics": [
        "filterDate",
        "filterDate.default",
        "filterDate.textmeta"
      ]
    },
    {
      "page": "filterID",
      "title": "Subcorpus With ID Filter",
      "topics": [
        "filterID",
        "filterID.default",
        "filterID.textmeta"
      ]
    },
    {
      "page": "filterWord",
      "title": "Subcorpus With Word Filter",
      "topics": [
        "filterWord",
        "filterWord.default",
        "filterWord.textmeta"
      ]
    },
    {
      "page": "intruderTopics",
      "title": "Function to validate the fit of the LDA model",
      "topics": [
        "intruderTopics"
      ]
    },
    {
      "page": "intruderWords",
      "title": "Function to validate the fit of the LDA model",
      "topics": [
        "intruderWords"
      ]
    },
    {
      "page": "LDAgen",
      "title": "Function to fit LDA model",
      "topics": [
        "LDAgen"
      ]
    },
    {
      "page": "LDAprep",
      "title": "Create Lda-ready Dataset",
      "topics": [
        "LDAprep"
      ]
    },
    {
      "page": "makeWordlist",
      "title": "Counts Words in Text Corpora",
      "topics": [
        "makeWordlist"
      ]
    },
    {
      "page": "mergeLDA",
      "title": "Preparation of Different LDAs For Clustering",
      "topics": [
        "mergeLDA"
      ]
    },
    {
      "page": "mergeTextmeta",
      "title": "Merge Textmeta Objects",
      "topics": [
        "mergeTextmeta"
      ]
    },
    {
      "page": "plotArea",
      "title": "Plotting topics over time as stacked areas below plotted lines.",
      "topics": [
        "plotArea"
      ]
    },
    {
      "page": "plotFreq",
      "title": "Plotting Counts of specified Wordgroups over Time (relative to Corpus)",
      "topics": [
        "plotFreq"
      ]
    },
    {
      "page": "plotHeat",
      "title": "Plotting Topics over Time relative to Corpus",
      "topics": [
        "plotHeat"
      ]
    },
    {
      "page": "plotScot",
      "title": "Plots Counts of Documents or Words over Time (relative to Corpus)",
      "topics": [
        "plotScot"
      ]
    },
    {
      "page": "plotTopic",
      "title": "Plotting Counts of Topics over Time (Relative to Corpus)",
      "topics": [
        "plotTopic"
      ]
    },
    {
      "page": "plotTopicWord",
      "title": "Plotting Counts of Topics-Words-Combination over Time (Relative to Words)",
      "topics": [
        "plotTopicWord"
      ]
    },
    {
      "page": "plotWordpt",
      "title": "Plots Counts of Topics-Words-Combination over Time (Relative to Topics)",
      "topics": [
        "plotWordpt"
      ]
    },
    {
      "page": "plotWordSub",
      "title": "Plotting Counts/Proportion of Words/Docs in LDA-generated Topic-Subcorpora over Time",
      "topics": [
        "plotWordSub"
      ]
    },
    {
      "page": "precisionRecall",
      "title": "Precision and Recall",
      "topics": [
        "precision",
        "recall",
        "vprecision",
        "vrecall"
      ]
    },
    {
      "page": "readTextmeta",
      "title": "Read Corpora as CSV",
      "topics": [
        "readTextmeta",
        "readTextmeta.df"
      ]
    },
    {
      "page": "readWhatsApp",
      "title": "Read WhatsApp files",
      "topics": [
        "readWhatsApp"
      ]
    },
    {
      "page": "readWiki",
      "title": "Read Pages from Wikipedia",
      "topics": [
        "readWiki"
      ]
    },
    {
      "page": "readWikinews",
      "title": "Read files from Wikinews",
      "topics": [
        "readWikinews"
      ]
    },
    {
      "page": "removeXML",
      "title": "Removes XML/HTML Tags and Umlauts",
      "topics": [
        "removeHTML",
        "removeUmlauts",
        "removeXML"
      ]
    },
    {
      "page": "sampling",
      "title": "Sample Texts",
      "topics": [
        "sampling"
      ]
    },
    {
      "page": "showMeta",
      "title": "Export Readable Meta-Data of Articles.",
      "topics": [
        "showMeta"
      ]
    },
    {
      "page": "showTexts",
      "title": "Exports Readable Text Lists",
      "topics": [
        "showTexts"
      ]
    },
    {
      "page": "textmeta",
      "title": "\"textmeta\"-Objects",
      "topics": [
        "is.textmeta",
        "plot.textmeta",
        "print.textmeta",
        "summary.textmeta",
        "textmeta"
      ]
    },
    {
      "page": "tidy.textmeta",
      "title": "Transform textmeta to an object with tidy text data",
      "topics": [
        "is.textmeta_tidy",
        "print.textmeta_tidy",
        "tidy.textmeta"
      ]
    },
    {
      "page": "topicCoherence",
      "title": "Calculating Topic Coherence",
      "topics": [
        "topicCoherence"
      ]
    },
    {
      "page": "topicsInText",
      "title": "Coloring the words of a text corresponding to topic allocation",
      "topics": [
        "topicsInText"
      ]
    },
    {
      "page": "topTexts",
      "title": "Get The IDs Of The Most Representive Texts",
      "topics": [
        "topTexts"
      ]
    },
    {
      "page": "topWords",
      "title": "Top Words per Topic",
      "topics": [
        "importance",
        "topWords"
      ]
    }
  ],
  "_readme": "https://github.com/docma-tu/tosca/raw/HEAD/README.md",
  "_rundeps": [
    "askpass",
    "base64enc",
    "BH",
    "cli",
    "cpp11",
    "curl",
    "data.table",
    "digest",
    "fastmap",
    "fastmatch",
    "generics",
    "glue",
    "htmltools",
    "httr",
    "ISOcodes",
    "jsonlite",
    "lattice",
    "lda",
    "lifecycle",
    "lubridate",
    "magrittr",
    "Matrix",
    "mime",
    "NLP",
    "openssl",
    "quanteda",
    "R6",
    "RColorBrewer",
    "Rcpp",
    "rlang",
    "slam",
    "SnowballC",
    "stopwords",
    "stringi",
    "stringr",
    "sys",
    "timechange",
    "tm",
    "vctrs",
    "WikipediR",
    "xml2",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "Vignette.Rmd",
      "filename": "Vignette.html",
      "title": "tosca: Tools for Statistical Content Analysis",
      "author": "Lars Koppers, Jonas Rieger, Karin Boczek, Gerret von Nordheim",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Data Preprocessing",
        "Read the Corpus - \\texttt",
        "Remove Umlauts and XML/HTML Tags - \\texttt",
        "Identifying Duplicates - \\texttt",
        "Clean Corpus - \\texttt",
        "Generate Wordlist - \\texttt",
        "Descriptive Analysis",
        "Generic Functions - \\texttt",
        "Visualisation of Corpus over Time - \\texttt",
        "Frequency Analysis - \\texttt",
        "Write CSV Files - \\texttt",
        "Generating Subcorpora",
        "Filter Corpus by Dates - \\texttt",
        "Filter Corpus by Wordcount - \\texttt",
        "Filter Corpus by Words - \\texttt",
        "Latent Dirichlet Allocation",
        "Transform Corpus - \\texttt",
        "Performing LDA - \\texttt",
        "Validation of LDA Results - \\texttt",
        "Clustering of Topics - \\texttt",
        "Visualisation of Topics over Time - \\texttt",
        "Visualisation of Topic Share over Time - \\texttt",
        "Visualisation of Words in Topic over Time - \\texttt",
        "Visualisation of Words in Articles allocated to Topics - \\texttt",
        "Heatmap of Topics over Time including Clustering - \\texttt",
        "Individual Cases Contemplation - \\texttt",
        "Example pipeline",
        "Conclusion"
      ],
      "created": "2018-08-30 09:56:32",
      "modified": "2021-04-18 13:59:21",
      "commits": 9
    }
  ],
  "_score": 6.900694774452399,
  "_indexed": true,
  "_nocasepkg": "tosca",
  "_universes": [
    "docma-tu"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.3-4",
      "date": "2026-05-12T07:00:30.000Z",
      "distro": "noble",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "588d71567f601a6d1323a2bcc7441fe09a998c7f68303cb54b15637e4dea9f69",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.3-4",
      "date": "2026-05-12T07:00:49.000Z",
      "distro": "noble",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "d20648889e3cb3fdd55747b95def0142ca9230d3aac953bc79caf7ea352bbf26",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.3-4",
      "date": "2026-05-12T07:00:05.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "3f3681a9477690250c002ffdf56a6b16eb989f9582ce819877a9490d2baed869",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.3-4",
      "date": "2026-05-12T06:59:59.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "27c85544389191d23891f15679328bf59048069ccba600f1545537035e7b76b8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.3-4",
      "date": "2026-05-12T07:00:33.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "dd6a79dcac61ebb6334b4d0bc09073f85b0c869e81fe2be8b1e5377f7c750d06",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.3-4",
      "date": "2026-05-12T06:59:37.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "60e3125135ae151b184d66822a726fef5bbde5e308b9322617ccb6d4ebfced51",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.3-4",
      "date": "2026-05-12T06:59:22.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "54c3a8d288f038a2e3e98f67670286f93d11500919ee4e45f7ffa0c82528e08f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.3-4",
      "date": "2026-05-22T10:55:40.000Z",
      "commit": "1b7601ae4b9552926ca4c4234d2efffc924c8966",
      "fileid": "009ac5e96c7ddc7746cf0cb0c63004e03b4e27472f15d58b62ddfa1993aa6ae2",
      "status": "success",
      "buildurl": "https://github.com/r-universe/docma-tu/actions/runs/25718567997"
    }
  ]
}