Title: | Japanese Utility Functions and Data |
---|---|
Description: | Some data treated by the Japanese R user require unique operations and processing. These are caused by address, Kanji, and traditional year representations. 'zipangu' transforms specific to Japan into something more general one. |
Authors: | Shinya Uryu [aut, cre] , Hiroaki Yutani [ctb] , Kazuhiro Maeda [ctb], Mao Kobayashi [ctb], Akiru Kato [ctb] |
Maintainer: | Shinya Uryu <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.3.3.9000 |
Built: | 2024-12-07 06:10:25 UTC |
Source: | https://github.com/uribo/zipangu |
convert_jdate(date, legacy = FALSE)
convert_jdate(date, legacy = FALSE)
date |
A character object. |
legacy |
A logical to switch converter. If |
convert_jdate("R3/2/27") #> [1] "2021-02-27" convert_jdate("\u4ee4\u548c2\u5e747\u67086\u65e5") #> [1] "2020-07-06"
convert_jyear(jyear, legacy = FALSE)
convert_jyear(jyear, legacy = FALSE)
jyear |
Japanese imperial year (jyear). Kanji or Roman character |
legacy |
A logical to switch converter. If |
convert_jyear("R1") #> [1] 2019 convert_jyear("Heisei2") #> [1] 1990 convert_jyear("\u5e73\u6210\u5143\u5e74") #> [1] 1989 convert_jyear(c("\u662d\u548c10\u5e74", "\u5e73\u621014\u5e74")) #> [1] 1935 2002 convert_jyear(kansuji2arabic_all("\u5e73\u6210\u4e09\u5e74")) #> [1] 1991
Convert prefecture names to roman or kanji
convert_prefecture(x, to)
convert_prefecture(x, to)
x |
prefecture name in kanji |
to |
conversion destination |
convert_prefecture(c("tokyo-to", "osaka", "ALL"), to="kanji") convert_prefecture( c("\u6771\u4eac", "\u5927\u962a\u5e9c", "\u5317\u6d77\u9053", "\u5168\u56fd"), to = "roman")
convert_prefecture(c("tokyo-to", "osaka", "ALL"), to="kanji") convert_prefecture( c("\u6771\u4eac", "\u5927\u962a\u5e9c", "\u5317\u6d77\u9053", "\u5168\u56fd"), to = "roman")
Convert prefecture names from kana
convert_prefecture_from_kana(x)
convert_prefecture_from_kana(x)
x |
prefecture name in kana |
convert_prefecture_from_kana(c("\u3068\u3046\u304d\u3087\u3046\u3068")) convert_prefecture_from_kana(c("\u30c8\u30a6\u30ad\u30e7\u30a6\u30c8", "\u30ad\u30e7\u30a6\u30c8")) convert_prefecture_from_kana(c("\u30c8\u30a6\u30ad\u30e7\u30a6", "\u304a\u304a\u3055\u304b"))
convert_prefecture_from_kana(c("\u3068\u3046\u304d\u3087\u3046\u3068")) convert_prefecture_from_kana(c("\u30c8\u30a6\u30ad\u30e7\u30a6\u30c8", "\u30ad\u30e7\u30a6\u30c8")) convert_prefecture_from_kana(c("\u30c8\u30a6\u30ad\u30e7\u30a6", "\u304a\u304a\u3055\u304b"))
dl_zipcode_file(path, exdir = NULL)
dl_zipcode_file(path, exdir = NULL)
path |
local file path or zip file URL |
exdir |
The directory to extract zip file. If |
## Not run: dl_zipcode_file(path = "https://www.post.japanpost.jp/zipcode/dl/oogaki/zip/02aomori.zip") dl_zipcode_file("https://www.post.japanpost.jp/zipcode/dl/oogaki/zip/02aomori.zip", exdir = getwd()) ## End(Not run)
## Not run: dl_zipcode_file(path = "https://www.post.japanpost.jp/zipcode/dl/oogaki/zip/02aomori.zip") dl_zipcode_file("https://www.post.japanpost.jp/zipcode/dl/oogaki/zip/02aomori.zip", exdir = getwd()) ## End(Not run)
Get the date of the Xth the specific weekday
find_date_by_wday(year, month, wday, ordinal)
find_date_by_wday(year, month, wday, ordinal)
year |
numeric year |
month |
numeric month |
wday |
numeric weekday |
ordinal |
number of week |
a vector of class POSIXct
find_date_by_wday(2021, 1, 2, 2)
find_date_by_wday(2021, 1, 2, 2)
harmonize_prefecture_name(x, to)
harmonize_prefecture_name(x, to)
x |
Input vector. |
to |
Option. Whether to use longer ("long") or shorter ("short") prefectures. |
Convert with and without terminal notation, respectively.
long option, long formal name
Use the short option to omit the trailing characters
x <- c("\u6771\u4eac\u90fd", "\u5317\u6d77\u9053", "\u6c96\u7e04\u770c") harmonize_prefecture_name(x, to = "short") x <- c("\u6771\u4eac", "\u5317\u6d77\u9053", "\u6c96\u7e04") harmonize_prefecture_name(x, to = "long")
x <- c("\u6771\u4eac\u90fd", "\u5317\u6d77\u9053", "\u6c96\u7e04\u770c") harmonize_prefecture_name(x, to = "short") x <- c("\u6771\u4eac", "\u5317\u6d77\u9053", "\u6c96\u7e04") harmonize_prefecture_name(x, to = "long")
Whether it is a holiday defined by Japanese law (enacted in 1948)
is_jholiday(date)
is_jholiday(date)
date |
a vector of POSIXt, numeric or character objects |
Holiday information refers to data published as of December 21, 2020. Future holidays are subject to change.
TRUE if x is a public holidays in Japan, FALSE otherwise.
is_jholiday("2021-01-01") #> [1] TRUE is_jholiday("2018-12-23") #> [1] TRUE is_jholiday("2019-12-23") #> [1] FALSE
is_prefecture(x)
is_prefecture(x)
x |
Input vector. |
Check if the string is a prefectural string.
If it contains the name of the prefecture and other
strings (e.g. city name), it returns FALSE
.
logical
is_prefecture("\u6771\u4eac\u90fd") is_prefecture(c("\u6771\u4eac", "\u4eac\u90fd", "\u3064\u304f\u3070"))
is_prefecture("\u6771\u4eac\u90fd") is_prefecture(c("\u6771\u4eac", "\u4eac\u90fd", "\u3064\u304f\u3070"))
is_zipcode(x)
is_zipcode(x)
x |
Zip-code. Number or character. Hyphens may be included, but the input must contain a 7-character number. |
A logical vector.
is_zipcode(7000027) is_zipcode("700-0027")
is_zipcode(7000027) is_zipcode("700-0027")
jholiday_spec(year, name, lang = "en") jholiday(year, lang = "en")
jholiday_spec(year, name, lang = "en") jholiday(year, lang = "en")
year |
numeric years after 1949.
If |
name |
holiday names. If this argument is not the same length of year, the first element will be recycled. |
lang |
switch for turning values to "en" or "jp". |
Holiday information refers to data published as of December 21, 2020. Future holidays are subject to change.
jholiday_spec(2019, "Sports Day") #> [1] "2019-10-14" jholiday_spec(2021, "Sports Day") #> [1] "2021-07-23"
List of a specific year holidays
jholiday(2021, "en") #> $`New Year's Day` #> [1] "2021-01-01" #> #> $`Coming of Age Day` #> [1] "2021-01-11" #> #> $`Foundation Day` #> [1] "2021-02-11" #> #> $`The Emperor's Birthday` #> [1] "2021-02-23" #> #> $`Vernal Equinox Day` #> [1] "2021-03-20" #> #> $`Showa Day` #> [1] "2021-04-29" #> #> $`Constitution Memorial Day` #> [1] "2021-05-03" #> #> $`Greenery Day` #> [1] "2021-05-04" #> #> $`Children's Day` #> [1] "2021-05-05" #> #> $`Marine Day` #> [1] "2021-07-22" #> #> $`Sports Day` #> [1] "2021-07-23" #> #> $`Mountain Day` #> [1] "2021-08-08" #> #> $`Respect for the Aged Day` #> [1] "2021-09-20" #> #> $`Autumnal Equinox Day` #> [1] "2021-09-23" #> #> $`Culture Day` #> [1] "2021-11-03" #> #> $`Labour Thanksgiving Day` #> [1] "2021-11-23"
Public Holiday Law https://www8.cao.go.jp/chosei/shukujitsu/gaiyou.html, https://laws.e-gov.go.jp/document?lawid=323AC1000000178
Prefectures dataset.
jpnprefs
jpnprefs
A tibble with 47 rows 5 variables:
jis_code: jis code
prefecture_kanji: prefecture names
prefecture: prefecture names
region: region
major_island:
jpnprefs
jpnprefs
Generates a vector consisting of the elements of kana. Options exist for the inclusion of several elements.
kana(type, ...) hiragana( core = TRUE, dakuon = FALSE, handakuon = FALSE, kogaki = FALSE, historical = FALSE ) katakana( core = TRUE, dakuon = FALSE, handakuon = FALSE, kogaki = FALSE, historical = FALSE )
kana(type, ...) hiragana( core = TRUE, dakuon = FALSE, handakuon = FALSE, kogaki = FALSE, historical = FALSE ) katakana( core = TRUE, dakuon = FALSE, handakuon = FALSE, kogaki = FALSE, historical = FALSE )
type |
"hiragana" ("hira") or "katakana" ("kata") |
... |
Arguments passed on to hiragana |
core |
is include core kana characters. |
dakuon |
e.g. ga, gi, gu, ge, go |
handakuon |
e.g. pa, pi, pu, pe, po |
kogaki |
small character |
historical |
old style |
kana(type = "hira", core = TRUE) kana(type = "hira", core = TRUE, handakuon = TRUE)
kana(type = "hira", core = TRUE) kana(type = "hira", core = TRUE, handakuon = TRUE)
Converts a given Kansuji element such as Ichi (1) and Nana (7) to an Arabic.
kansuji2arabic_all()
converts only Kansuji in the string.
kansuji2arabic_num()
convert kansuji that contain the positions (e.g. Hyaku,
Sen, etc) with the numbers represented by kansuji. kansuji2arabic_str()
converts kansuji in a string to numbers represented by kansuji while
retaining the non-kansuji characters.
kansuji2arabic(str, convert = TRUE, .under = Inf) kansuji2arabic_all(str, ...) kansuji2arabic_num(str, consecutive = c("convert", "non"), ...) kansuji2arabic_str( str, consecutive = c("convert", "non"), widths = c("all", "halfwidth"), ... )
kansuji2arabic(str, convert = TRUE, .under = Inf) kansuji2arabic_all(str, ...) kansuji2arabic_num(str, consecutive = c("convert", "non"), ...) kansuji2arabic_str( str, consecutive = c("convert", "non"), widths = c("all", "halfwidth"), ... )
str |
Input vector. |
convert |
If |
.under |
Number scale to be converted. The default value is infinity. |
... |
Other arguments to carry over to |
consecutive |
If you select "convert", any sequence of 1 to 9 kansuji will be replaced with Arabic numerals. If you select "non", any sequence of 1-9 kansuji will not be replaced by Arabic numerals. |
widths |
If you select "all", both full-width and half-width Arabic numerals are taken into account when calculating kansuji, but if you select "halfwidth", only half-width Arabic numerals are taken into account when calculating kansuji. |
a character or numeric.
kansuji2arabic("\u4e00") kansuji2arabic(c("\u4e00", "\u767e")) kansuji2arabic(c("\u4e00", "\u767e"), convert = FALSE) # Keep Kansuji over 1000. kansuji2arabic(c("\u4e00", "\u767e", "\u5343"), .under = 1000) # Convert all character kansuji2arabic_all("\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341") kansuji2arabic_all("\u516b\u4e01\u76ee") # Convert kansuji that contain the positions with the numbers represented by kansuji. kansuji2arabic_num("\u4e00\u5104\u4e8c\u5343\u4e09\u767e\u56db\u5341\u4e94\u4e07") kansuji2arabic_num("\u4e00\u5104\u4e8c\u4e09\u56db\u4e94\u4e07\u516d\u4e03\u516b\u4e5d") # Converts kansuji in a string to numbers represented by kansuji. kansuji2arabic_str("\u91d1\u4e00\u5104\u4e8c\u5343\u4e09\u767e\u56db\u5341\u4e94\u4e07\u5186") kansuji2arabic_str("\u91d1\u4e00\u5104\u4e8c\u4e09\u56db\u4e94\u4e07\u516d\u4e03\u516b\u4e5d\u5186") kansuji2arabic_str("\u91d11\u51042345\u4e076789\u5186")
kansuji2arabic("\u4e00") kansuji2arabic(c("\u4e00", "\u767e")) kansuji2arabic(c("\u4e00", "\u767e"), convert = FALSE) # Keep Kansuji over 1000. kansuji2arabic(c("\u4e00", "\u767e", "\u5343"), .under = 1000) # Convert all character kansuji2arabic_all("\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341") kansuji2arabic_all("\u516b\u4e01\u76ee") # Convert kansuji that contain the positions with the numbers represented by kansuji. kansuji2arabic_num("\u4e00\u5104\u4e8c\u5343\u4e09\u767e\u56db\u5341\u4e94\u4e07") kansuji2arabic_num("\u4e00\u5104\u4e8c\u4e09\u56db\u4e94\u4e07\u516d\u4e03\u516b\u4e5d") # Converts kansuji in a string to numbers represented by kansuji. kansuji2arabic_str("\u91d1\u4e00\u5104\u4e8c\u5343\u4e09\u767e\u56db\u5341\u4e94\u4e07\u5186") kansuji2arabic_str("\u91d1\u4e00\u5104\u4e8c\u4e09\u56db\u4e94\u4e07\u516d\u4e03\u516b\u4e5d\u5186") kansuji2arabic_str("\u91d11\u51042345\u4e076789\u5186")
Automatically scales and labels with the Kansuji Myriad Scale (e.g. "Man",
"Oku", etc).
Use label_kansuji()
converts the label value to either Kansuji value or a
mixture of Arabic numerals and the Kansuji Scales for ten thousands,
billions, and ten quadrillions.
Use label_kansuji_suffix()
converts the label value to an Arabic numeral
followed by the Kansuji Scale with the suffix.
label_kansuji( unit = NULL, sep = "", prefix = "", big.mark = "", number = c("arabic", "kansuji"), ... ) label_kansuji_suffix( accuracy = 1, unit = NULL, sep = NULL, prefix = "", big.mark = "", significant.digits = FALSE, ... )
label_kansuji( unit = NULL, sep = "", prefix = "", big.mark = "", number = c("arabic", "kansuji"), ... ) label_kansuji_suffix( accuracy = 1, unit = NULL, sep = NULL, prefix = "", big.mark = "", significant.digits = FALSE, ... )
unit |
Optional units specifier. |
sep |
Separator between number and Kansuji unit. |
prefix |
Symbols to display before value. |
big.mark |
Character used between every 3 digits to separate thousands. |
number |
If Number is arabic, it will return a mixture of Arabic and the Kansuji Myriad Scale; if Kansuji, it will return only Kansuji numerals. |
... |
Other arguments passed on to |
accuracy |
A number to round to. Use (e.g.) 0.01 to show 2 decimal places of precision. |
significant.digits |
Determines whether or not the value of accurary is
valid as a significant figure with a decimal point. The default is FALSE, in
which case if accurary is 2 and the value is 1.10, 1.1 will be displayed,
but if TRUE and installed |
All label_()
functions return a "labelling" function, i.e. a function
that takes a vector x and returns a character vector of length(x) giving a
label for each input value.
library("scales") demo_continuous(c(1, 1e9), label = label_kansuji()) demo_continuous(c(1, 1e9), label = label_kansuji_suffix())
read_zipcode(path, type = c("oogaki", "kogaki", "roman", "jigyosyo"))
read_zipcode(path, type = c("oogaki", "kogaki", "roman", "jigyosyo"))
path |
local file path or zip file URL |
type |
Input file type, one of "oogaki", "kogaki", "roman", "jigyosyo" |
Reads zip-code data in csv format provided by japan post group and parse it as a data.frame. Corresponds to the available "oogaki", "kogaki", "roman" and "jigyosyo" types. These file types must be specified by the argument.
https://www.post.japanpost.jp/zipcode/dl/readme.html, https://www.post.japanpost.jp/zipcode/dl/jigyosyo/readme.html
# Input sources read_zipcode(path = system.file("zipcode_dummy/13TOKYO_oogaki.CSV", package = "zipangu"), type = "oogaki") read_zipcode(system.file("zipcode_dummy/13TOKYO_kogaki.CSV", package = "zipangu"), "oogaki") read_zipcode(system.file("zipcode_dummy/KEN_ALL_ROME.CSV", package = "zipangu"), "roman") read_zipcode(system.file("zipcode_dummy/JIGYOSYO.CSV", package = "zipangu"), "jigyosyo") ## Not run: # Or directly from a URL read_zipcode("https://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip") ## End(Not run)
# Input sources read_zipcode(path = system.file("zipcode_dummy/13TOKYO_oogaki.CSV", package = "zipangu"), type = "oogaki") read_zipcode(system.file("zipcode_dummy/13TOKYO_kogaki.CSV", package = "zipangu"), "oogaki") read_zipcode(system.file("zipcode_dummy/KEN_ALL_ROME.CSV", package = "zipangu"), "roman") read_zipcode(system.file("zipcode_dummy/JIGYOSYO.CSV", package = "zipangu"), "jigyosyo") ## Not run: # Or directly from a URL read_zipcode("https://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip") ## End(Not run)
Parses and decomposes address string into elements of prefecture, city, and lower address.
separate_address(str)
separate_address(str)
str |
Input vector. address strings. |
A list of elements that make up an address.
separate_address("\u5317\u6d77\u9053\u672d\u5e4c\u5e02\u4e2d\u592e\u533a")
separate_address("\u5317\u6d77\u9053\u672d\u5e4c\u5e02\u4e2d\u592e\u533a")
str_jconv(str, fun, to) str_conv_hirakana(str, to = c("hiragana", "katakana")) str_conv_zenhan(str, to = c("zenkaku", "hankaku")) str_conv_romanhira(str, to = c("roman", "hiragana")) str_conv_normalize(str, to = c("nfkc"))
str_jconv(str, fun, to) str_conv_hirakana(str, to = c("hiragana", "katakana")) str_conv_zenhan(str, to = c("zenkaku", "hankaku")) str_conv_romanhira(str, to = c("roman", "hiragana")) str_conv_normalize(str, to = c("nfkc"))
str |
Input vector. |
fun |
convert function |
to |
Select the type of character to convert. |
Converts the types of string treat by Japanese people to each other. The following types are supported.
Hiraganra to Katakana
Zenkaku to Hankaku
Latin (Roman) to Hiragana
These functions are powered by the stringi package's stri_trans_general().
str_jconv("\u30a2\u30a4\u30a6\u30a8\u30aa", str_conv_hirakana, to = "hiragana") str_jconv("\u3042\u3044\u3046\u3048\u304a", str_conv_hirakana, to = "katakana") str_jconv("\uff41\uff10", str_conv_zenhan, "hankaku") str_jconv("\uff76\uff9e\uff6f", str_conv_zenhan, "zenkaku") str_jconv("\u30a2\u30a4\u30a6\u30a8\u30aa", str_conv_romanhira, "roman") str_jconv("\u2460", str_conv_normalize, "nfkc") str_conv_hirakana("\u30a2\u30a4\u30a6\u30a8\u30aa", to = "hiragana") str_conv_hirakana("\u3042\u3044\u3046\u3048\u304a", to = "katakana") str_conv_zenhan("\uff41\uff10", "hankaku") str_conv_zenhan("\uff76\uff9e\uff6f", "zenkaku") str_conv_romanhira("aiueo", "hiragana") str_conv_romanhira("\u3042\u3044\u3046\u3048\u304a", "roman") str_conv_normalize("\u2460", "nfkc")
str_jconv("\u30a2\u30a4\u30a6\u30a8\u30aa", str_conv_hirakana, to = "hiragana") str_jconv("\u3042\u3044\u3046\u3048\u304a", str_conv_hirakana, to = "katakana") str_jconv("\uff41\uff10", str_conv_zenhan, "hankaku") str_jconv("\uff76\uff9e\uff6f", str_conv_zenhan, "zenkaku") str_jconv("\u30a2\u30a4\u30a6\u30a8\u30aa", str_conv_romanhira, "roman") str_jconv("\u2460", str_conv_normalize, "nfkc") str_conv_hirakana("\u30a2\u30a4\u30a6\u30a8\u30aa", to = "hiragana") str_conv_hirakana("\u3042\u3044\u3046\u3048\u304a", to = "katakana") str_conv_zenhan("\uff41\uff10", "hankaku") str_conv_zenhan("\uff76\uff9e\uff6f", "zenkaku") str_conv_romanhira("aiueo", "hiragana") str_conv_romanhira("\u3042\u3044\u3046\u3048\u304a", "roman") str_conv_normalize("\u2460", "nfkc")
Converts characters following the rules of 'neologd'
str_jnormalize(str)
str_jnormalize(str)
str |
Input vector. |
Converts the characters into normalized style basing on rules that is recommended by the Neologism dictionary for MeCab.
a character
https://github.com/neologd/mecab-ipadic-neologd/wiki/Regexp.ja
str_jnormalize( paste0( " \uff30", "\uff32\uff2d\uff2c\u300 \u526f \u8aad \u672c " ) ) str_jnormalize( paste0( "\u5357\u30a2\u30eb\u30d7\u30b9\u306e\u3000\u5929\u7136\u6c34", "-\u3000\uff33\uff50\uff41\uff52\uff4b\uff49\uff4e\uff47\u3000", "\uff2c\uff45\uff4d\uff4f\uff4e\u3000\u30ec\u30e2\u30f3\u4e00\u7d5e\u308a" ) )
str_jnormalize( paste0( " \uff30", "\uff32\uff2d\uff2c\u300 \u526f \u8aad \u672c " ) ) str_jnormalize( paste0( "\u5357\u30a2\u30eb\u30d7\u30b9\u306e\u3000\u5929\u7136\u6c34", "-\u3000\uff33\uff50\uff41\uff52\uff4b\uff49\uff4e\uff47\u3000", "\uff2c\uff45\uff4d\uff4f\uff4e\u3000\u30ec\u30e2\u30f3\u4e00\u7d5e\u308a" ) )
Inserts a hyphen as a delimiter in the given zip-code string. Or exclude the hyphen.
zipcode_spacer(x, remove = FALSE)
zipcode_spacer(x, remove = FALSE)
x |
Zip-code. Number or character. Hyphens may be included, but the input must contain a 7-character number. |
remove |
Default is |
zipcode_spacer(7000027) zipcode_spacer("305-0053") zipcode_spacer("305-0053", remove = TRUE)
zipcode_spacer(7000027) zipcode_spacer("305-0053") zipcode_spacer("305-0053", remove = TRUE)