Source code for chat.database

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# PEP 8 check with Pylint
"""Manage NLU database based on neo4j graph database.
管理基于neo4j图形数据库的自然语言理解数据库。

The 'py2neo' is a python package of neo4j graph database.
1.Support increase, delete, modify, query and other operations for nodes,
relationships, subgraph and graph;
2.Support batch processing;
3.Support command line;
4.Support read the data file, batch import and export.
"py2neo"是neo4j图形数据库的python接口包。
1.支持节点,关系,子图,全图的增、删、改、查;
2.支持批处理;
3.支持命令行;
4.支持读取数据文件批量导入及导出。

Available functions:
- All classes and functions: 所有类和函数
"""
import os
import string
from optparse import OptionParser
from py2neo import Graph, Node, Relationship, NodeSelector
from .mytools import read_excel
from .semantic import get_tag

# Add in 2017-5-12 知识库excel文件路径
datapath = os.path.split(os.path.realpath(__file__))[0]


[docs]class Database(): """Manage Database. 管理数据库。 It support python command line parameter processing of relational database and graph database. You can view all the features by 'python xxx.py -h'. 支持关系数据库和图形数据库的python命令行参数处理。 可以通过'python xxx.py -h'查看所有功能。 Public attributes: - rdb: Relational database. 关系数据库。 - graph: Graph database. 图数据库。 """ def __init__(self, password="train", userid="userid", is_admin=True): self.is_admin = is_admin self.rdb = None self.graph = Graph("http://localhost:7474/db/data", password=password) self.selector = NodeSelector(self.graph) # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18 # self.gconfig = self.graph.find_one("User", "userid", userid) # 用法1:subgraph = selector.select("Label", property=value) # 用法2:subgraph = selector.select("Person").where("_.name =~ 'J.*'", "1960 <= _.born < 1970") self.gconfig = self.selector.select("User", userid=userid).first() self.usage = "usage: python %prog [options] arg" self.version = "%prog 1.0" self.parser = OptionParser(usage=self.usage, version=self.version) self.parser.add_option("-v", "--verbose", action="store_true", dest="verbose") self.parser.add_option("-q", "--quiet", action="store_false", dest="verbose") self.parser.add_option("-b", "--batch", dest="batch", action="store_true", \ help="batch processing of graph database") self.parser.add_option("-f", "--file", dest="filename", \ help="read data from filename") self.parser.add_option("-p", "--path", dest="filepath", \ help="read data from filepath") self.parser.add_option("-a", "--add", dest="add", \ help="add subgraph to graph database") self.parser.add_option("-d", "--delete", dest="delete", \ help="delete subgraph of graph database") self.parser.add_option("-e", "--edit", dest="edit", \ help="edit subgraph of graph database") self.parser.add_option("-s", "--search", dest="search", \ help="search subgraph of graph database") (self.options, self.args) = self.parser.parse_args() # if len(self.args) == 0: # self.parser.error("incorrect number of arguments") if self.options.verbose: print("reading %s..." % self.options.filename) if self.options.delete: for label in self.args: self.delete(pattern=self.options.delete, label=label) def delete(self, pattern="n", label=None): """Batch delete data or subgraph in database. 在数据库中批量删除数据或者子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ if pattern == "all": self.graph.delete_all() elif pattern == "n": self.graph.run("MATCH(n:" + label + ") DETACH DELETE n") elif pattern == "r": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r") elif pattern == "nr": self.graph.run("MATCH (n)<-[r:" + label + "]-(m) DETACH DELETE r DELETE n") elif pattern == "rm": self.graph.run("MATCH (n)-[r:" + label + "]->(m) DETACH DELETE r DELETE m") elif pattern == "nrm": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r DELETE n DELETE m") def reset(self, pattern="n", label=None, filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern="n", label="NluCell") print("Delete successfully!") if os.path.exists(filename): self.handle_excel(filename) else: print("You can set 'filename=<filepath>' when you call 'Database.reset.'") print("Reset successfully!") def reset_ts(self, pattern="n", label="TestStandard", filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern="n", label=label) print("Delete test standard successfully!") if os.path.exists(filename): self.handle_ts(filename) else: print("You can set 'filename=<filepath>' when you call 'Database.reset.'") print("Reset test standard successfully!") def add_qa(self, label="NluCell", name=None, content=None, topic="", \ behavior="", parameter="", url="", tag="", keywords="", api="", txt="", \ img="", chart="", delimiter=None): """ Add qa node in graph. """ assert name is not None, "name must be string." assert content is not None, "content must be string." questions = name.split(delimiter) for question in questions: if question: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 tag = get_tag(question, self.gconfig) node = Node(label, name=question, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, hot="0") self.graph.create(node) def add_ts(self, label="TestStandard", question=None, content=None, context="", \ behavior="", parameter="", url=""): """ Add test standard node in graph. """ assert question is not None, "question must be string." assert content is not None, "content must be string." for item in question.split(): if item: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 node = Node(label, question=item, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) self.graph.create(node) def handle_ts(self, filename=None, custom_sheets=None): """Processing data of test standard. """ assert filename is not None, "filename can not be None." data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list(set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): question = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value context = table.cell(i, col_index[2]).value behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value self.add_ts(question=question, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) except Exception as error: print('Error: %s' %error) return None else: print('Error! Data of %s is empty!' %sheet_name) return None def handle_excel(self, filename=None, custom_sheets=None): """Processing data of excel. """ assert filename is not None, "filename can not be None" data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list(set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) topics = [] # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): name = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value # TODO 确定用户可以自定义哪些内容 topic = table.cell(i, col_index[2]).value if self.is_admin else "user_chat" behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value tag = table.cell(i, col_index[6]).value keywords = table.cell(i, col_index[7]).value api = table.cell(i, col_index[8]).value txt = table.cell(i, col_index[9]).value img = table.cell(i, col_index[10]).value chart = table.cell(i, col_index[11]).value # hot = 0 table.cell(i, col_index[12]).value # 3.Your processing function of excel data here self.add_qa(name=name, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, \ delimiter="|") # 添加到场景标签列表 topics.append(topic) except Exception as error: print('Error: %s' %error) return None else: print('Error! Data of %s is empty!' %sheet_name) return None # Modify in 2017.4.28 # 若子表格名字不存在,新建配置子图,否则只修改topic属性 # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18 # config_node = self.graph.find_one("Config", "name", sheet_name) config_node = self.selector.select("Config", name=sheet_name).first() if not config_node: self.graph.run('MATCH (user:User {userid: "' + self.gconfig["userid"] + \ '"})\nCREATE (config:Config {name: "' + sheet_name + '", topic: "' + \ ",".join(set(topics)) + '"})\nCREATE (user)-[:has {bselected: 1, available: 1}]->(config)') else: alltopics = config_node["topic"].split(",") alltopics.extend(topics) config_node["topic"] = ",".join(set(alltopics)) self.graph.push(config_node) def handle_txt(self, filename=None): """ Processing text file to generate subgraph. """ assert filename is not None, "filename can not be None!" with open(filename, encoding="UTF-8") as file: question = file.readline().rstrip() while question: answer = file.readline().rstrip() print("question: " + question) print("answer: " + answer) self.add_qa(name=question, content=answer, delimiter="|") question = file.readline().rstrip() def register_subgraph(self, *, label="Config", name=None, topic=None): """注册子知识库 """ assert name is not None, "Subgraph name can not be None!" assert topic is not None, "Subgraph topic can not be None!" subgraph = self.selector.select(label, name=name).first() if subgraph: topics = subgraph["topic"].split(",") topics.extend(topic.split(",")) subgraph["topic"] = ",".join(set(topics)) self.graph.push(subgraph) else: node = Node(label, name=name, topic=topic) self.graph.create(node) def register_user(self, *, label="User", profile=None): """注册用户 """ userid = input("\n欢迎注册!请输入userid: ") while not userid: userid = input("userid不能为空!请输入userid: ") while self.graph.run("MATCH (user:User {userid: '" + userid + "'}) RETURN user").data(): userid = input("用户已存在!请输入新的userid: ") username = input("username: ") robotname = input("robotname: ") robotage = input("robotage: ") robotgender = input("robotgender: ") mother = input("mother: ") father = input("father: ") companyname = input("companyname: ") companytype = input("companytype: ") servicename = input("servicename: ") director = input("director: ") address = input("address: ") province = input("province: ") city = input("city: ") node = Node(label, userid=userid, username=username, robotname=robotname, \ robotage=robotage, robotgender=robotgender, mother=mother, father=father, \ companyname=companyname, companytype=companytype, servicename=servicename, \ director=director, address=address, province=province, city=city) self.graph.create(node) print("注册成功!") # 设置知识库权限 subgraph_names = [item["name"] for item in self.selector.select("Config")] print("可配置知识库列表:", subgraph_names) for name in subgraph_names: self.manage_user(userid=userid, name=name) def manage_user(self, *, userid=None, name=None): """管理用户 """ assert userid is not None, "Userid can not be None!" assert name is not None, "Subgraph name can not be None!" user = self.selector.select("User", userid=userid).first() if not user: print("用户不存在,建议您先注册!") return subgraph = self.selector.select("Config", name=name).first() if not subgraph: print("知识库不存在,建议您先注册!") return print("\n待配置知识库:", name) bselected = input("是否选择 [1/0]: ") if not bselected: bselected = "1" available = input("是否可用 [1/0]: ") if not available: available = "1" set_string = "MATCH (user:User {userid: '" + userid + "'}), (subgraph:Config {name: '" \ + name + "'}) CREATE UNIQUE (user)-[r:has]->(subgraph) SET r.bselected=" \ + bselected + ", r.available=" + available self.graph.run(set_string)
# match_string = "MATCH (user:User {userid: '" + userid + \ # "'})-[r:has]->(subgraph:Config {name: '" + name +"'}) RETURN r" # relation = self.graph.run(match_string).data() # if relation: # set # set_string = "MATCH (user:User {userid: '" + userid + \ # "'})-[r:has]->(subgraph:Config {name: '" + name +"'}) SET r.bselected=" + bselected +", r.available=" + available # self.graph.run(set_string) # else: # create # create_string = "MATCH (user:User {userid: '" + userid + \ # "'}), (subgraph:Config {name: '" + name +"'})" + \ # " CREATE UNIQUE (user)-[:has {bselected: " + bselected + \ # ", available: " + available + "}]->(subgraph)" # self.graph.run(create_string)