{
  "_id": "6a1d38961d7bb097a0a3df31",
  "Package": "klsh",
  "Type": "Package",
  "Title": "Blocking for Record Linkage",
  "Version": "0.1.0",
  "Authors@R": "person(\"Rebecca\", \"Steorts\", email = \"beka@stat.duke.edu\",\nrole = c(\"aut\", \"cre\"))",
  "VignetteBuilder": "knitr",
  "Description": "An implementation of the blocking algorithm KLSH in\nSteorts, Ventura, Sadinle, Fienberg (2014)\n<DOI:10.1007/978-3-319-11257-2_20>, which is a k-means variant\nof locality sensitive hashing. The method is illustrated with\nexamples and a vignette.",
  "Encoding": "UTF-8",
  "License": "GPL-3",
  "RoxygenNote": "7.1.1.9000",
  "Config/pak/sysreqs": "libicu-dev",
  "Repository": "https://cleanzr.r-universe.dev",
  "Date/Publication": "2020-10-14 10:05:44 UTC",
  "RemoteUrl": "https://github.com/cleanzr/klsh",
  "RemoteRef": "HEAD",
  "RemoteSha": "de9921997ec1c86044a8648899fb883c352a78b1",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-06-01 07:40:59 UTC",
    "User": "root"
  },
  "Author": "Rebecca Steorts [aut, cre]",
  "Maintainer": "Rebecca Steorts <beka@stat.duke.edu>",
  "MD5sum": "1a5fd0c83b248ad5e2963a7a761b53fa",
  "_user": "cleanzr",
  "_type": "src",
  "_file": "klsh_0.1.0.tar.gz",
  "_fileid": "9cd4e4f5aed43646b16445a253ba98a40198752c4321d27db5c1e9de85a3f08f",
  "_filesize": 197508,
  "_sha256": "9cd4e4f5aed43646b16445a253ba98a40198752c4321d27db5c1e9de85a3f08f",
  "_created": "2026-06-01T07:40:59.000Z",
  "_published": "2026-06-01T07:45:26.169Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 78807344351,
      "time": 109,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "7325910804"
    },
    {
      "job": 78807344344,
      "time": 112,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7325911547"
    },
    {
      "job": 78807344368,
      "time": 106,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "7325948387"
    },
    {
      "job": 78807344316,
      "time": 81,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7325925633"
    },
    {
      "job": 78806858689,
      "time": 208,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7325877477"
    },
    {
      "job": 78807344323,
      "time": 103,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7325909030"
    },
    {
      "job": 78807344397,
      "time": 95,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "7325906577"
    },
    {
      "job": 78807344356,
      "time": 95,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "7325906591"
    },
    {
      "job": 78807344345,
      "time": 74,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7325899992"
    }
  ],
  "_buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/cleanzr/klsh",
  "_commit": {
    "id": "de9921997ec1c86044a8648899fb883c352a78b1",
    "author": "Rebecca Steorts <beka@stat.duke.edu>",
    "committer": "Rebecca Steorts <beka@stat.duke.edu>",
    "message": "prepare for cran.\n",
    "time": 1602669944
  },
  "_maintainer": {
    "name": "Rebecca Steorts",
    "email": "beka@stat.duke.edu",
    "login": "resteorts",
    "description": "\n    Assistant Professor, Duke University, Department of Statistical Science\n",
    "uuid": 4843162
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.0.2",
      "role": "Depends"
    },
    {
      "package": "blink",
      "role": "Depends"
    },
    {
      "package": "stats",
      "role": "Depends"
    },
    {
      "package": "utils",
      "role": "Depends"
    },
    {
      "package": "plyr",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "SnowballC",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "ggplot2",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    }
  ],
  "_owner": "cleanzr",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_stars": 0,
  "_contributors": [
    {
      "user": "resteorts",
      "count": 18,
      "uuid": 4843162
    },
    {
      "user": "andeek",
      "count": 2,
      "uuid": 2414427
    }
  ],
  "_userbio": {
    "uuid": 55656800,
    "type": "organization",
    "name": "cleanzr"
  },
  "_downloads": {
    "count": 205,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/klsh"
  },
  "_devurl": "https://github.com/cleanzr/klsh",
  "_searchresults": 3,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/klsh.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/cleanzr/klsh",
  "_realowner": "cleanzr",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2020-10-22"
    }
  ],
  "_exports": [
    "bag_of_word_ify",
    "bag_signatures",
    "block.ids.from.blocking",
    "calc_idf",
    "confusion.from.blocking",
    "klsh",
    "reduction.ratio",
    "reduction.ratio.from.blocking",
    "rproject_bags",
    "sacks_of_bags_of_words",
    "tokenify"
  ],
  "_help": [
    {
      "page": "bag_of_word_ify",
      "title": "Function to convert a record into a bag of tokens with a fieldwise flag",
      "topics": [
        "bag_of_word_ify"
      ]
    },
    {
      "page": "bag_signatures",
      "title": "Function that reduces a bag of words into a signature matrix using multiple random projections",
      "topics": [
        "bag_signatures"
      ]
    },
    {
      "page": "block.ids.from.blocking",
      "title": "Returns the block ids associated with a blocking method.",
      "topics": [
        "block.ids.from.blocking"
      ]
    },
    {
      "page": "calc_idf",
      "title": "Function to calculate the inverse document frequency given a shingled bag of words",
      "topics": [
        "calc_idf"
      ]
    },
    {
      "page": "confusion.from.blocking",
      "title": "Perform evaluations (recall) for blocking.",
      "topics": [
        "confusion.from.blocking"
      ]
    },
    {
      "page": "klsh",
      "title": "Function that reduces a bag of words into a signature matrix using multiple random projections",
      "topics": [
        "klsh"
      ]
    },
    {
      "page": "reduction.ratio",
      "title": "Returns the reduction ratio associated with a blocking method",
      "topics": [
        "reduction.ratio"
      ]
    },
    {
      "page": "reduction.ratio.from.blocking",
      "title": "Returns the reduction ratio associated with a blocking method",
      "topics": [
        "reduction.ratio.from.blocking"
      ]
    },
    {
      "page": "rproject_bags",
      "title": "Function that generates unit random vectors and takes (weighted) projections onto the random unit vectors given a bag of words",
      "topics": [
        "rproject_bags"
      ]
    },
    {
      "page": "sacks_of_bags_of_words",
      "title": "Function to convert all records into a bag of tokens",
      "topics": [
        "sacks_of_bags_of_words"
      ]
    },
    {
      "page": "tokenify",
      "title": "Function to token a string into its k components",
      "topics": [
        "tokenify"
      ]
    }
  ],
  "_readme": "https://github.com/cleanzr/klsh/raw/HEAD/README.md",
  "_rundeps": [
    "blink",
    "plyr",
    "Rcpp",
    "SnowballC",
    "stringdist",
    "stringi"
  ],
  "_vignettes": [
    {
      "source": "klsh.Rmd",
      "filename": "klsh.html",
      "title": "klsh",
      "author": "Rebecca C. Steorts",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Understanding the RLdata500 dataset",
        "KLSH applied to RLdata500"
      ],
      "created": "2020-10-08 15:35:54",
      "modified": "2020-10-12 22:02:13",
      "commits": 3
    }
  ],
  "_score": 3.6989700043360187,
  "_indexed": true,
  "_nocasepkg": "klsh",
  "_universes": [
    "cleanzr",
    "resteorts"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:47.000Z",
      "distro": "noble",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "c3c22910aadaf0a19b9f216f3ecfc5ad651dc8ca28a6638c936a8f666f159725",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:53.000Z",
      "distro": "noble",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "a97a2026d94c9d71c5525b5fc97c579cc8813d63605fd0861c1a65bcb79bdde6",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-06-01T07:44:58.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "99848b42dfc7bc7f277336aaf634646f0708db83eab78d49ec3dcfc322ed2325",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-06-01T07:43:42.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "97cffa3bb5ce8e0dde19bb84da7788f1c9606c7122cc5006f84da72e293bba8d",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:57.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "3794ae1467b11b136b2507adef523cba82fddaaa9c8218cd361bd95e160ef164",
      "status": "success",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:28.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "d0906e8055eeaa73e4d924c5c0bf449beb7d369a28f9dfd09446f6ca5a21dc72",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:28.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "5634de736668da008ceb0ee3ed7f3c96da1e373bb59b5a14b7f39d8894d7e001",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-06-01T07:42:09.000Z",
      "commit": "de9921997ec1c86044a8648899fb883c352a78b1",
      "fileid": "80b838df2952ef054b256a5528c5e4c779631a0c73e6333591871ba940d6f63c",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cleanzr/actions/runs/26741629624"
    }
  ]
}