{"id":1530,"date":"2021-09-07T13:13:42","date_gmt":"2021-09-07T04:13:42","guid":{"rendered":"https:\/\/tech.at-iroha.jp\/?p=1530"},"modified":"2021-09-10T14:16:21","modified_gmt":"2021-09-10T05:16:21","slug":"%e8%87%aa%e7%84%b6%e8%a8%80%e8%aa%9e%e5%87%a6%e7%90%86%ef%bc%9amecab-%e3%81%a8-python-%e3%81%ab%e3%82%88%e3%82%8b%e3%83%af%e3%83%bc%e3%83%89%e3%82%af%e3%83%a9%e3%82%a6%e3%83%89%e4%bd%9c%e6%88%90","status":"publish","type":"post","link":"https:\/\/tech.at-iroha.jp\/?p=1530","title":{"rendered":"\u81ea\u7136\u8a00\u8a9e\u51e6\u7406\uff1aMeCab \u3068 Python \u306b\u3088\u308b\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210"},"content":{"rendered":"\n<p>\u6700\u8fd1\u3001\u7814\u7a76\u76ee\u7684\u3067\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210\u7528\u306e\u7c21\u6613\u7684\u306a WEB-API \u3092\u5b9f\u88c5\u3059\u308b\u6a5f\u4f1a\u304c\u3042\u3063\u305f\u305f\u3081\u3001\u624b\u9806\u3092\u307e\u3068\u3081\u3066\u307f\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u74b0\u5883\u69cb\u7bc9\uff08Unbutu \/ MeCab \/ WordCloud\uff09<\/h2>\n\n\n\n<p>AWS \u306e EC2 (Ubuntu 20) \u4e0a\u306b\u4ee5\u4e0b\u306e\u624b\u9806\u3067\u74b0\u5883\u3092\u69cb\u7bc9\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>pip \uff08Python \u7528\u30d1\u30c3\u30b1\u30fc\u30b8\u30de\u30cd\u30fc\u30b8\u30e3\uff09\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo apt-get update\nsudo apt install python3-pip<\/code><\/pre>\n\n\n\n<p>WordCloud\uff08Python \u7528\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210\u30e9\u30a4\u30d6\u30e9\u30ea\uff09\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo pip3 install wordcloud<\/code><\/pre>\n\n\n\n<p>MeCab \u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo apt install mecab\nsudo apt install libmecab-dev\nsudo apt install mecab-ipadic-utf8<\/code><\/pre>\n\n\n\n<p>Python\u7528\u306e MeCab \u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo pip3 install mecab-python3\nsudo pip3 install unidic-lite<\/code><\/pre>\n\n\n\n<p>\u65e5\u672c\u8a9e\u30d5\u30a9\u30f3\u30c8\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo apt install fontconfig\nsudo apt install fonts-ipaexfont<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210\u7528\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u306e\u4f5c\u6210<\/h2>\n\n\n\n<p>wc_test.py \u3068\u3044\u3046\u30d5\u30a1\u30a4\u30eb\u540d\u3067\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210\u7528\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import MeCab\n\nfrom wordcloud import WordCloud\n\nFILE_NAME = \"sample.txt\"\n\nwith open(FILE_NAME, \"r\", encoding=\"utf-8\") as f:\n    text = f.read()\n\nstop_words = &#91;\n\t'\u3042','\u3044','\u3046','\u3048','\u304a',\n\t'\u304b','\u304d','\u304f','\u3051','\u3053',\n\t'\u3055','\u3057','\u3059','\u305b','\u305d',\n\t'\u305f','\u3061','\u3064','\u3066','\u3068',\n\t'\u306a','\u306b','\u306c','\u306d','\u306e',\n\t'\u306f','\u3072','\u3075','\u3078','\u307b',\n\t'\u307e','\u307f','\u3080','\u3081','\u3082',\n\t'\u3084','\u3086','\u3088',\n\t'\u3089','\u308a','\u308b','\u308c','\u308d',\n\t'\u308f','\u3092','\u3093',\n\t'\u304c','\u304e','\u3050','\u3052','\u3054',\n\t'\u3056','\u3058','\u305a','\u305c','\u305e',\n\t'\u3060','\u3062','\u3065','\u3067','\u3069',\n\t'\u3060','\u3062','\u3065','\u3067','\u3069',\n\t'\u3059\u308b', '\u3044\u308b', '\u3042\u308b', '\u306a\u3044', '\u304a\u308b',\n\t'\u3082\u306e', '\u3044\u3046', '\u305d\u3046', '\u306a\u308b', '\u898b\u308b',\n\t''\n]\n\n#MeCab \u3092\u4f7f\u7528\u3057\u3066\u5f62\u614b\u7d20\u89e3\u6790\nmecab = MeCab.Tagger(\"-O chasen -d \/var\/lib\/mecab\/dic\/ipadic-utf8\/\")\nnode  = mecab.parseToNode(text)\nwords = &#91;]\n\n#\u540d\u8a5e\u3001\u52d5\u8a5e\u3001\u52d5\u8a5e\u3067\u3042\u308b\u5358\u8a9e\u306e\u307f\u3092\u62bd\u51fa\nwhile node:\n    if node.feature.split(\",\")&#91;0] == u\"\u540d\u8a5e\":\n        words.append(node.surface)\n    elif node.feature.split(\",\")&#91;0] == u\"\u5f62\u5bb9\u8a5e\":\n        words.append(node.feature.split(\",\")&#91;6])\n    elif node.feature.split(\",\")&#91;0] == u\"\u52d5\u8a5e\":\n        words.append(node.feature.split(\",\")&#91;6])\n    node = node.next\n\n#\u5358\u8a9e\u3092\u7a7a\u767d\u3067\u7d50\u5408\ntext = ' '.join(words);\n\n#\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u3092\u4f5c\u6210\nwordcloud = WordCloud(\n\twidth = 400,  # \u5e45\n\theight = 300,  # \u9ad8\u3055\n\tbackground_color = 'white', # \u80cc\u666f\u8272\n\tfont_path = '\/usr\/share\/fonts\/truetype\/fonts-japanese-gothic.ttf', # \u65e5\u672c\u8a9e\u30d5\u30a9\u30f3\u30c8\u3092\u6307\u5b9a\n\tstopwords = set(stop_words), # \u51fa\u529b\u304b\u3089\u9664\u5916\u3059\u308b\u5358\u8a9e\n)\n\nwordcloud.generate(text)\nwordcloud.to_file(\"wordcloud.png\")\n<\/code><\/pre>\n\n\n\n<p>sample.txt \u3068\u3044\u3046\u30d5\u30a1\u30a4\u30eb\u540d\u3067\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30b5\u30f3\u30d7\u30eb\u7528\u306e\u30c6\u30ad\u30b9\u30c8\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\u543e\u8f29\u308f\u304c\u306f\u3044\u306f\u732b\u3067\u3042\u308b\u3002\u540d\u524d\u306f\u307e\u3060\u7121\u3044\u3002\n\u3000\u3069\u3053\u3067\u751f\u308c\u305f\u304b\u3068\u3093\u3068\u898b\u5f53\u3051\u3093\u3068\u3046\u304c\u3064\u304b\u306c\u3002\u4f55\u3067\u3082\u8584\u6697\u3044\u3058\u3081\u3058\u3081\u3057\u305f\u6240\u3067\u30cb\u30e3\u30fc\u30cb\u30e3\u30fc\u6ce3\u3044\u3066\u3044\u305f\u4e8b\u3060\u3051\u306f\u8a18\u61b6\u3057\u3066\u3044\u308b\u3002\u543e\u8f29\u306f\u3053\u3053\u3067\u59cb\u3081\u3066\u4eba\u9593\u3068\u3044\u3046\u3082\u306e\u3092\u898b\u305f\u3002\u3057\u304b\u3082\u3042\u3068\u3067\u805e\u304f\u3068\u305d\u308c\u306f\u66f8\u751f\u3068\u3044\u3046\u4eba\u9593\u4e2d\u3067\u4e00\u756a\u7370\u60aa\u3069\u3046\u3042\u304f\u306a\u7a2e\u65cf\u3067\u3042\u3063\u305f\u305d\u3046\u3060\u3002\u3053\u306e\u66f8\u751f\u3068\u3044\u3046\u306e\u306f\u6642\u3005\u6211\u3005\u3092\u6355\u3064\u304b\u307e\u3048\u3066\u716e\u306b\u3066\u98df\u3046\u3068\u3044\u3046\u8a71\u3067\u3042\u308b\u3002\u3057\u304b\u3057\u305d\u306e\u5f53\u6642\u306f\u4f55\u3068\u3044\u3046\u8003\u3082\u306a\u304b\u3063\u305f\u304b\u3089\u5225\u6bb5\u6050\u3057\u3044\u3068\u3082\u601d\u308f\u306a\u304b\u3063\u305f\u3002\u305f\u3060\u5f7c\u306e\u638c\u3066\u306e\u3072\u3089\u306b\u8f09\u305b\u3089\u308c\u3066\u30b9\u30fc\u3068\u6301\u3061\u4e0a\u3052\u3089\u308c\u305f\u6642\u4f55\u3060\u304b\u30d5\u30ef\u30d5\u30ef\u3057\u305f\u611f\u3058\u304c\u3042\u3063\u305f\u3070\u304b\u308a\u3067\u3042\u308b\u3002\u638c\u306e\u4e0a\u3067\u5c11\u3057\u843d\u3061\u3064\u3044\u3066\u66f8\u751f\u306e\u9854\u3092\u898b\u305f\u306e\u304c\u3044\u308f\u3086\u308b\u4eba\u9593\u3068\u3044\u3046\u3082\u306e\u306e\u898b\u59cb\u307f\u306f\u3058\u3081\u3067\u3042\u308d\u3046\u3002\u3053\u306e\u6642\u5999\u306a\u3082\u306e\u3060\u3068\u601d\u3063\u305f\u611f\u3058\u304c\u4eca\u3067\u3082\u6b8b\u3063\u3066\u3044\u308b\u3002\u7b2c\u4e00\u6bdb\u3092\u3082\u3063\u3066\u88c5\u98fe\u3055\u308c\u3079\u304d\u306f\u305a\u306e\u9854\u304c\u3064\u308b\u3064\u308b\u3057\u3066\u307e\u308b\u3067\u85ac\u7f36\u3084\u304b\u3093\u3060\u3002\u305d\u306e\u5f8c\u3054\u732b\u306b\u3082\u3060\u3044\u3076\u9022\u3042\u3063\u305f\u304c\u3053\u3093\u306a\u7247\u8f2a\u304b\u305f\u308f\u306b\u306f\u4e00\u5ea6\u3082\u51fa\u4f1a\u3067\u304f\u308f\u3057\u305f\u4e8b\u304c\u306a\u3044\u3002\u306e\u307f\u306a\u3089\u305a\u9854\u306e\u771f\u4e2d\u304c\u3042\u307e\u308a\u306b\u7a81\u8d77\u3057\u3066\u3044\u308b\u3002\u305d\u3046\u3057\u3066\u305d\u306e\u7a74\u306e\u4e2d\u304b\u3089\u6642\u3005\u3077\u3046\u3077\u3046\u3068\u7159\u3051\u3080\u308a\u3092\u5439\u304f\u3002\u3069\u3046\u3082\u54bd\u3080\u305b\u307d\u304f\u3066\u5b9f\u306b\u5f31\u3063\u305f\u3002\u3053\u308c\u304c\u4eba\u9593\u306e\u98f2\u3080\u7159\u8349\u305f\u3070\u3053\u3068\u3044\u3046\u3082\u306e\u3067\u3042\u308b\u4e8b\u306f\u3088\u3046\u3084\u304f\u3053\u306e\u9803\u77e5\u3063\u305f\u3002\n\u3000\u3053\u306e\u66f8\u751f\u306e\u638c\u306e\u88cf\u3046\u3061\u3067\u3057\u3070\u3089\u304f\u306f\u3088\u3044\u5fc3\u6301\u306b\u5750\u3063\u3066\u304a\u3063\u305f\u304c\u3001\u3057\u3070\u3089\u304f\u3059\u308b\u3068\u975e\u5e38\u306a\u901f\u529b\u3067\u904b\u8ee2\u3057\u59cb\u3081\u305f\u3002\u66f8\u751f\u304c\u52d5\u304f\u306e\u304b\u81ea\u5206\u3060\u3051\u304c\u52d5\u304f\u306e\u304b\u5206\u3089\u306a\u3044\u304c\u7121\u6697\u3080\u3084\u307f\u306b\u773c\u304c\u5efb\u308b\u3002\u80f8\u304c\u60aa\u304f\u306a\u308b\u3002\u5230\u5e95\u3068\u3046\u3066\u3044\u52a9\u304b\u3089\u306a\u3044\u3068\u601d\u3063\u3066\u3044\u308b\u3068\u3001\u3069\u3055\u308a\u3068\u97f3\u304c\u3057\u3066\u773c\u304b\u3089\u706b\u304c\u51fa\u305f\u3002\u305d\u308c\u307e\u3067\u306f\u8a18\u61b6\u3057\u3066\u3044\u308b\u304c\u3042\u3068\u306f\u4f55\u306e\u4e8b\u3084\u3089\u3044\u304f\u3089\u8003\u3048\u51fa\u305d\u3046\u3068\u3057\u3066\u3082\u5206\u3089\u306a\u3044\u3002\n\u3000\u3075\u3068\u6c17\u304c\u4ed8\u3044\u3066\u898b\u308b\u3068\u66f8\u751f\u306f\u3044\u306a\u3044\u3002\u305f\u304f\u3055\u3093\u304a\u3063\u305f\u5144\u5f1f\u304c\u4e00\u758b\u3074\u304d\u3082\u898b\u3048\u306c\u3002\u809d\u5fc3\u304b\u3093\u3058\u3093\u306e\u6bcd\u89aa\u3055\u3048\u59ff\u3092\u96a0\u3057\u3066\u3057\u307e\u3063\u305f\u3002\u305d\u306e\u4e0a\u4eca\u3044\u307e\u307e\u3067\u306e\u6240\u3068\u306f\u9055\u3063\u3066\u7121\u6697\u3080\u3084\u307f\u306b\u660e\u308b\u3044\u3002\u773c\u3092\u660e\u3044\u3066\u3044\u3089\u308c\u306c\u304f\u3089\u3044\u3060\u3002\u306f\u3066\u306a\u4f55\u3067\u3082\u5bb9\u5b50\u3088\u3046\u3059\u304c\u304a\u304b\u3057\u3044\u3068\u3001\u306e\u305d\u306e\u305d\u9019\u306f\u3044\u51fa\u3057\u3066\u898b\u308b\u3068\u975e\u5e38\u306b\u75db\u3044\u3002\u543e\u8f29\u306f\u85c1\u308f\u3089\u306e\u4e0a\u304b\u3089\u6025\u306b\u7b39\u539f\u306e\u4e2d\u3078\u68c4\u3066\u3089\u308c\u305f\u306e\u3067\u3042\u308b\u3002\n\u3000\u3088\u3046\u3084\u304f\u306e\u601d\u3044\u3067\u7b39\u539f\u3092\u9019\u3044\u51fa\u3059\u3068\u5411\u3046\u306b\u5927\u304d\u306a\u6c60\u304c\u3042\u308b\u3002\u543e\u8f29\u306f\u6c60\u306e\u524d\u306b\u5750\u3063\u3066\u3069\u3046\u3057\u305f\u3089\u3088\u304b\u308d\u3046\u3068\u8003\u3048\u3066\u898b\u305f\u3002\u5225\u306b\u3053\u308c\u3068\u3044\u3046\u5206\u5225\u3075\u3093\u3079\u3064\u3082\u51fa\u306a\u3044\u3002\u3057\u3070\u3089\u304f\u3057\u3066\u6ce3\u3044\u305f\u3089\u66f8\u751f\u304c\u307e\u305f\u8fce\u306b\u6765\u3066\u304f\u308c\u308b\u304b\u3068\u8003\u3048\u4ed8\u3044\u305f\u3002\u30cb\u30e3\u30fc\u3001\u30cb\u30e3\u30fc\u3068\u8a66\u307f\u306b\u3084\u3063\u3066\u898b\u305f\u304c\u8ab0\u3082\u6765\u306a\u3044\u3002\u305d\u306e\u3046\u3061\u6c60\u306e\u4e0a\u3092\u3055\u3089\u3055\u3089\u3068\u98a8\u304c\u6e21\u3063\u3066\u65e5\u304c\u66ae\u308c\u304b\u304b\u308b\u3002\u8179\u304c\u975e\u5e38\u306b\u6e1b\u3063\u3066\u6765\u305f\u3002\u6ce3\u304d\u305f\u304f\u3066\u3082\u58f0\u304c\u51fa\u306a\u3044\u3002\u4ed5\u65b9\u304c\u306a\u3044\u3001\u4f55\u3067\u3082\u3088\u3044\u304b\u3089\u98df\u7269\u304f\u3044\u3082\u306e\u306e\u3042\u308b\u6240\u307e\u3067\u3042\u308b\u3053\u3046\u3068\u6c7a\u5fc3\u3092\u3057\u3066\u305d\u308d\u308a\u305d\u308d\u308a\u3068\u6c60\u3092\u5de6\u3072\u3060\u308a\u306b\u5efb\u308a\u59cb\u3081\u305f\u3002\u3069\u3046\u3082\u975e\u5e38\u306b\u82e6\u3057\u3044\u3002\u305d\u3053\u3092\u6211\u6162\u3057\u3066\u7121\u7406\u3084\u308a\u306b\u9019\u306f\u3063\u3066\u884c\u304f\u3068\u3088\u3046\u3084\u304f\u306e\u4e8b\u3067\u4f55\u3068\u306a\u304f\u4eba\u9593\u81ed\u3044\u6240\u3078\u51fa\u305f\u3002\u3053\u3053\u3078\u9019\u5165\u306f\u3044\u3063\u305f\u3089\u3001\u3069\u3046\u306b\u304b\u306a\u308b\u3068\u601d\u3063\u3066\u7af9\u57a3\u306e\u5d29\u304f\u305a\u308c\u305f\u7a74\u304b\u3089\u3001\u3068\u3042\u308b\u90b8\u5185\u306b\u3082\u3050\u308a\u8fbc\u3093\u3060\u3002\u7e01\u306f\u4e0d\u601d\u8b70\u306a\u3082\u306e\u3067\u3001\u3082\u3057\u3053\u306e\u7af9\u57a3\u304c\u7834\u308c\u3066\u3044\u306a\u304b\u3063\u305f\u306a\u3089\u3001\u543e\u8f29\u306f\u3064\u3044\u306b\u8def\u508d\u308d\u307c\u3046\u306b\u9913\u6b7b\u304c\u3057\u3057\u305f\u304b\u3082\u77e5\u308c\u3093\u306e\u3067\u3042\u308b\u3002\u4e00\u6a39\u306e\u852d\u3068\u306f\u3088\u304f\u4e91\u3044\u3063\u305f\u3082\u306e\u3060\u3002\u3053\u306e\u57a3\u6839\u306e\u7a74\u306f\u4eca\u65e5\u3053\u3093\u306b\u3061\u306b\u81f3\u308b\u307e\u3067\u543e\u8f29\u304c\u96a3\u5bb6\u3068\u306a\u308a\u306e\u4e09\u6bdb\u3092\u8a2a\u554f\u3059\u308b\u6642\u306e\u901a\u8def\u306b\u306a\u3063\u3066\u3044\u308b\u3002\u3055\u3066\u90b8\u3084\u3057\u304d\u3078\u306f\u5fcd\u3073\u8fbc\u3093\u3060\u3082\u306e\u306e\u3053\u308c\u304b\u3089\u5148\u3069\u3046\u3057\u3066\u5584\u3044\u3044\u304b\u5206\u3089\u306a\u3044\u3002\u305d\u306e\u3046\u3061\u306b\u6697\u304f\u306a\u308b\u3001\u8179\u306f\u6e1b\u308b\u3001\u5bd2\u3055\u306f\u5bd2\u3057\u3001\u96e8\u304c\u964d\u3063\u3066\u6765\u308b\u3068\u3044\u3046\u59cb\u672b\u3067\u3082\u3046\u4e00\u523b\u306e\u7336\u4e88\u3086\u3046\u3088\u304c\u51fa\u6765\u306a\u304f\u306a\u3063\u305f\u3002\u4ed5\u65b9\u304c\u306a\u3044\u304b\u3089\u3068\u306b\u304b\u304f\u660e\u308b\u304f\u3066\u6696\u304b\u305d\u3046\u306a\u65b9\u3078\u65b9\u3078\u3068\u3042\u308b\u3044\u3066\u884c\u304f\u3002\u4eca\u304b\u3089\u8003\u3048\u308b\u3068\u305d\u306e\u6642\u306f\u3059\u3067\u306b\u5bb6\u306e\u5185\u306b\u9019\u5165\u3063\u3066\u304a\u3063\u305f\u306e\u3060\u3002\u3053\u3053\u3067\u543e\u8f29\u306f\u5f7c\u304b\u306e\u66f8\u751f\u4ee5\u5916\u306e\u4eba\u9593\u3092\u518d\u3073\u898b\u308b\u3079\u304d\u6a5f\u4f1a\u306b\u906d\u9047\u305d\u3046\u3050\u3046\u3057\u305f\u306e\u3067\u3042\u308b\u3002\u7b2c\u4e00\u306b\u9022\u3063\u305f\u306e\u304c\u304a\u3055\u3093\u3067\u3042\u308b\u3002\u3053\u308c\u306f\u524d\u306e\u66f8\u751f\u3088\u308a\u4e00\u5c64\u4e71\u66b4\u306a\u65b9\u3067\u543e\u8f29\u3092\u898b\u308b\u3084\u5426\u3084\u3044\u304d\u306a\u308a\u9838\u7b4b\u304f\u3073\u3059\u3058\u3092\u3064\u304b\u3093\u3067\u8868\u3078\u629b\u307b\u3046\u308a\u51fa\u3057\u305f\u3002\u3044\u3084\u3053\u308c\u306f\u99c4\u76ee\u3060\u3068\u601d\u3063\u305f\u304b\u3089\u773c\u3092\u306d\u3076\u3063\u3066\u904b\u3092\u5929\u306b\u4efb\u305b\u3066\u3044\u305f\u3002\u3057\u304b\u3057\u3072\u3082\u3058\u3044\u306e\u3068\u5bd2\u3044\u306e\u306b\u306f\u3069\u3046\u3057\u3066\u3082\u6211\u6162\u304c\u51fa\u6765\u3093\u3002\u543e\u8f29\u306f\u518d\u3073\u304a\u3055\u3093\u306e\u9699\u3059\u304d\u3092\u898b\u3066\u53f0\u6240\u3078\u9019\u306f\u3044\u4e0a\u3042\u304c\u3063\u305f\u3002\u3059\u308b\u3068\u9593\u3082\u306a\u304f\u307e\u305f\u6295\u3052\u51fa\u3055\u308c\u305f\u3002\u543e\u8f29\u306f\u6295\u3052\u51fa\u3055\u308c\u3066\u306f\u9019\u3044\u4e0a\u308a\u3001\u9019\u3044\u4e0a\u3063\u3066\u306f\u6295\u3052\u51fa\u3055\u308c\u3001\u4f55\u3067\u3082\u540c\u3058\u4e8b\u3092\u56db\u4e94\u904d\u7e70\u308a\u8fd4\u3057\u305f\u306e\u3092\u8a18\u61b6\u3057\u3066\u3044\u308b\u3002\u305d\u306e\u6642\u306b\u304a\u3055\u3093\u3068\u4e91\u3046\u8005\u306f\u3064\u304f\u3065\u304f\u3044\u3084\u306b\u306a\u3063\u305f\u3002\u3053\u306e\u9593\u304a\u3055\u3093\u306e\u4e09\u99ac\u3055\u3093\u307e\u3092\u5078\u306c\u3059\u3093\u3067\u3053\u306e\u8fd4\u5831\u3092\u3057\u3066\u3084\u3063\u3066\u304b\u3089\u3001\u3084\u3063\u3068\u80f8\u306e\u75de\u3064\u304b\u3048\u304c\u4e0b\u308a\u305f\u3002\u543e\u8f29\u304c\u6700\u5f8c\u306b\u3064\u307e\u307f\u51fa\u3055\u308c\u3088\u3046\u3068\u3057\u305f\u3068\u304d\u306b\u3001\u3053\u306e\u5bb6\u3046\u3061\u306e\u4e3b\u4eba\u304c\u9a12\u3005\u3057\u3044\u4f55\u3060\u3068\u3044\u3044\u306a\u304c\u3089\u51fa\u3066\u6765\u305f\u3002\u4e0b\u5973\u306f\u543e\u8f29\u3092\u3076\u3089\u4e0b\u3052\u3066\u4e3b\u4eba\u306e\u65b9\u3078\u5411\u3051\u3066\u3053\u306e\u5bbf\u3084\u3069\u306a\u3057\u306e\u5c0f\u732b\u304c\u3044\u304f\u3089\u51fa\u3057\u3066\u3082\u51fa\u3057\u3066\u3082\u5fa1\u53f0\u6240\u304a\u3060\u3044\u3069\u3053\u308d\u3078\u4e0a\u3042\u304c\u3063\u3066\u6765\u3066\u56f0\u308a\u307e\u3059\u3068\u3044\u3046\u3002\u4e3b\u4eba\u306f\u9f3b\u306e\u4e0b\u306e\u9ed2\u3044\u6bdb\u3092\u649a\u3072\u306d\u308a\u306a\u304c\u3089\u543e\u8f29\u306e\u9854\u3092\u3057\u3070\u3089\u304f\u773a\u306a\u304c\u3081\u3066\u304a\u3063\u305f\u304c\u3001\u3084\u304c\u3066\u305d\u3093\u306a\u3089\u5185\u3078\u7f6e\u3044\u3066\u3084\u308c\u3068\u3044\u3063\u305f\u307e\u307e\u5965\u3078\u9019\u5165\u306f\u3044\u3063\u3066\u3057\u307e\u3063\u305f\u3002\u4e3b\u4eba\u306f\u3042\u307e\u308a\u53e3\u3092\u805e\u304b\u306c\u4eba\u3068\u898b\u3048\u305f\u3002\u4e0b\u5973\u306f\u53e3\u60dc\u304f\u3084\u3057\u305d\u3046\u306b\u543e\u8f29\u3092\u53f0\u6240\u3078\u629b\u307b\u3046\u308a\u51fa\u3057\u305f\u3002\u304b\u304f\u3057\u3066\u543e\u8f29\u306f\u3064\u3044\u306b\u3053\u306e\u5bb6\u3046\u3061\u3092\u81ea\u5206\u306e\u4f4f\u5bb6\u3059\u307f\u304b\u3068\u6975\u304d\u3081\u308b\u4e8b\u306b\u3057\u305f\u306e\u3067\u3042\u308b\u3002<\/code><\/pre>\n\n\n\n<p>\u5b9f\u884c\u3088\u3046\u306b\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u306e\u753b\u50cf\u30d5\u30a1\u30a4\u30eb\u304c\u751f\u6210\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>python3 wc_test.py<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"800\" height=\"300\" src=\"https:\/\/tech.at-iroha.jp\/wp-content\/uploads\/2021\/09\/wordcloud-2.png\" alt=\"\" class=\"wp-image-1535\" srcset=\"https:\/\/tech.at-iroha.jp\/wp-content\/uploads\/2021\/09\/wordcloud-2.png 800w, https:\/\/tech.at-iroha.jp\/wp-content\/uploads\/2021\/09\/wordcloud-2-700x263.png 700w, https:\/\/tech.at-iroha.jp\/wp-content\/uploads\/2021\/09\/wordcloud-2-768x288.png 768w\" sizes=\"auto, (max-width: 800px) 100vw, 800px\" \/><\/figure>\n\n\n\n<p>\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u306f\u5358\u8a9e\u306e\u91cd\u307f\u304c\u8003\u616e\u3055\u308c\u3066\u3044\u306a\u3044\u70ba\u3001\u4e00\u822c\u7684\u306a\u5358\u8a9e\u3084\u610f\u56f3\u3057\u306a\u3044\u8a18\u53f7\u304c\u62bd\u51fa\u3055\u308c\u308b\u3053\u3068\u304c\u591a\u304f\u3042\u308a\u307e\u3059\u3002\u305d\u306e\u305f\u3081\u9664\u5916\u3059\u308b\u5358\u8a9e\u30ea\u30b9\u30c8\uff08 stopwords \uff09\u3092\u3046\u307e\u304f\u6d3b\u7528\u3057\u3066\u30c7\u30fc\u30bf\u306e\u30af\u30ec\u30f3\u30b8\u30f3\u30b0\u3092\u884c\u3046\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u30c6\u30ad\u30b9\u30c8\u304b\u3089\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u3092\u4f5c\u6210\u3059\u308b\u7c21\u6613\u7684\u306a WEB-API \u306e\u4f5c\u6210<\/h2>\n\n\n\n<p>Apache \u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30b3\u30fc\u30c9\u3092\u4f5c\u6210\u3059\u308b\u3053\u3068\u3067\u3001POST\u3055\u308c\u305f\u30c6\u30ad\u30b9\u30c8\u3092\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u306b\u5909\u63db\u3059\u308bAPI\u306e\u4f5c\u6210\u3082\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<p>Apache \u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo apt install apache2<\/code><\/pre>\n\n\n\n<p>\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306e\u6240\u6709\u8005\u3092\u5909\u66f4\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sudo chown -R $USER:$USER \/var\/www\/\nsudo chown -R $USER:$USER \/usr\/lib\/cgi-bin\/\nsudo chown -R $USER:$USER \/etc\/apache2\/sites-available\/\n<\/code><\/pre>\n\n\n\n<p>\u4ee5\u4e0b\u306e\u5185\u5bb9\u3067 \/etc\/apache2\/conf-available\/cgi-enabled.conf \u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&lt;Directory \"\/usr\/lib\/cgi-bin\"&gt;\n    Options +ExecCGI\n    AddHandler cgi-script .cgi .py\n&lt;\/Directory&gt;\n<\/code><\/pre>\n\n\n\n<p> \u4ee5\u4e0b\u306e\u5185\u5bb9\u3067  \/usr\/lib\/cgi-bin\/.htaccess  \u3092\u4f5c\u6210\u3057\u307e\u3059\u3002 <\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>AddDefaultCharset utf-8\nAddType 'application\/json; charset=UTF-8' .json<\/code><\/pre>\n\n\n\n<p> \/usr\/lib\/cgi-bin\/  \u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u4e0a\u306b text2wc.py \u3068\u3044\u3046\u30d5\u30a1\u30a4\u30eb\u540d\u3067\u4ee5\u4e0b\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>#!\/usr\/bin\/env python3\n# -*- coding: utf-8 -*-\nimport cgi\nimport sys\nimport io\nimport json\nimport MeCab\n\nfrom wordcloud import WordCloud\n\n#print('Content-Type: text\/html;charset=utf-8')\nprint('Content-Type: application\/json;charset=utf-8')\nprint('')\n\nsys.path.append('\/home\/ubuntu\/.local\/lib\/python3.8\/site-packages')\n\n#\u30dd\u30b9\u30c8\u3055\u308c\u305f\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\npost_data = cgi.FieldStorage()\n\ntext = post_data.getvalue('text', 'default_value')\nno   = post_data.getvalue('no', '0') #\u30e6\u30cb\u30fc\u30af\u306aID\n\n#\u5b89\u5168\u306e\u305f\u3081\u6574\u6570\u5024\u306b\u5909\u63db\nno = int(no)\n\n#\u9664\u5916\u3059\u308b\u5358\u8a9e\u306e\u30ea\u30b9\u30c8\nstop_words = &#91;\n\t'\u3042','\u3044','\u3046','\u3048','\u304a',\n\t'\u304b','\u304d','\u304f','\u3051','\u3053',\n\t'\u3055','\u3057','\u3059','\u305b','\u305d',\n\t'\u305f','\u3061','\u3064','\u3066','\u3068',\n\t'\u306a','\u306b','\u306c','\u306d','\u306e',\n\t'\u306f','\u3072','\u3075','\u3078','\u307b',\n\t'\u307e','\u307f','\u3080','\u3081','\u3082',\n\t'\u3084','\u3086','\u3088',\n\t'\u3089','\u308a','\u308b','\u308c','\u308d',\n\t'\u308f','\u3092','\u3093',\n\t'\u304c','\u304e','\u3050','\u3052','\u3054',\n\t'\u3056','\u3058','\u305a','\u305c','\u305e',\n\t'\u3060','\u3062','\u3065','\u3067','\u3069',\n\t'\u3060','\u3062','\u3065','\u3067','\u3069',\n\t'\u3059\u308b', '\u3044\u308b', '\u3042\u308b', '\u306a\u3044', '\u304a\u308b',\n\t'\u3082\u306e', '\u3044\u3046', '\u305d\u3046', '\u306a\u308b', '\u898b\u308b',\n\t''\n]\n\n#MeCab \u3092\u4f7f\u7528\u3057\u3066\u5f62\u614b\u7d20\u89e3\u6790\nmecab = MeCab.Tagger(\"-O chasen -d \/var\/lib\/mecab\/dic\/ipadic-utf8\/\")\nnode = mecab.parseToNode(text)\nwords = &#91;]\n\nwhile node:\n    if node.feature.split(\",\")&#91;0] == u\"\u540d\u8a5e\":\n        words.append(node.surface)\n    elif node.feature.split(\",\")&#91;0] == u\"\u5f62\u5bb9\u8a5e\":\n        words.append(node.feature.split(\",\")&#91;6])\n    elif node.feature.split(\",\")&#91;0] == u\"\u52d5\u8a5e\":\n        words.append(node.feature.split(\",\")&#91;6])\n    node = node.next\n\n#\u5358\u8a9e\u3092\u7a7a\u767d\u3067\u7d50\u5408\ntext = ' '.join(words);\n\n#\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u3092\u4f5c\u6210\nwordcloud = WordCloud(\n\twidth = 800,  # \u5e45\n\theight = 200,  # \u9ad8\u3055\n\tbackground_color = 'white', # \u80cc\u666f\u8272\n\tfont_path = '\/usr\/share\/fonts\/truetype\/fonts-japanese-gothic.ttf', # \u65e5\u672c\u8a9e\u30d5\u30a9\u30f3\u30c8\u3092\u6307\u5b9a\n#\tmax_font_size = 60, # \u30b5\u30a4\u30ba\u30d5\u30a9\u30f3\u30c8\u30b5\u30a4\u30ba\n#\tregexp = r\"&#91;\\w']+\",\n\tstopwords = set(stop_words), # \u51fa\u529b\u304b\u3089\u9664\u5916\u3059\u308b\u5358\u8a9e\n)\n\nfile_name = 'wordcloud_' + str(no) + '.png'\n\nwordcloud.generate(text)\nwordcloud.to_file('\/var\/www\/html\/output\/' + file_name)\n\n#\u51fa\u529b\u3059\u308b\u30c7\u30fc\u30bf\nresponse = {}\nresponse&#91;'file_name'] = file_name\n\n#print('&lt;img src=\"..\/output\/' + file_name + '\"&gt;')\n\n#\u30d5\u30a1\u30a4\u30eb\u540d\u3092\u51fa\u529b\nprint(response)\n<\/code><\/pre>\n\n\n\n<p>\/cgi-bin\/ text2wc.py \u306b\u5bfe\u3057\u3066\u3001\u4f5c\u6210\u306e\u5143\u3068\u306a\u308b\u6587\u7ae0\uff08text\uff09\u3068\u30e6\u30cb\u30fc\u30af\u306a\u756a\u53f7\uff08no\uff09\u3092 POST \u3059\u308b\u3068\u3001\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u753b\u50cf\u4f5c\u6210\u5f8c\u306b\u30d5\u30a1\u30a4\u30eb\u540d\u304c\u51fa\u529b\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">{'file_name': 'wordcloud_123456.png'}<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u6700\u8fd1\u3001\u7814\u7a76\u76ee\u7684\u3067\u30ef\u30fc\u30c9\u30af\u30e9\u30a6\u30c9\u4f5c\u6210\u7528\u306e\u7c21\u6613\u7684\u306a WEB-API \u3092\u5b9f\u88c5\u3059\u308b\u6a5f\u4f1a\u304c\u3042\u3063\u305f\u305f\u3081\u3001\u624b\u9806\u3092\u307e\u3068\u3081\u3066\u307f\u307e\u3057\u305f\u3002 \u74b0\u5883\u69cb\u7bc9\uff08Unbutu \/ MeCab \/ WordCloud\uff09 AWS \u306e EC2 (Ubuntu  [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":1533,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[21],"tags":[],"class_list":["post-1530","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-nlp"],"_links":{"self":[{"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/posts\/1530","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1530"}],"version-history":[{"count":19,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/posts\/1530\/revisions"}],"predecessor-version":[{"id":1579,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/posts\/1530\/revisions\/1579"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=\/wp\/v2\/media\/1533"}],"wp:attachment":[{"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1530"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1530"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/tech.at-iroha.jp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1530"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}