|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import requests |
| 4 | +import xml.etree.ElementTree as ET |
| 5 | +import sys, os |
| 6 | +import re, string |
| 7 | +import configparser |
| 8 | + |
| 9 | +## createClassifier <trainingdata.xml> |
| 10 | + |
| 11 | +## Load config |
| 12 | +config = configparser.ConfigParser() |
| 13 | +config.read(os.path.dirname(sys.argv[0]) + '/../config/uclassify.ini') |
| 14 | +configBase = config['uclassify.com'] |
| 15 | +UCLASSIFY_CLASSIFIER = configBase['ClassifierName'] #'Device Category' |
| 16 | +UCLASSIFY_WRITE_TOKEN = configBase['WriteToken'] |
| 17 | +UCLASSIFY_BASE_URL = configBase['serviceURL'] + configBase['User'] |
| 18 | + |
| 19 | +USE_API = True |
| 20 | + |
| 21 | +## Load the xml training file supplied in commandline |
| 22 | +if len(sys.argv) < 2: |
| 23 | + ## File not supplied - fatal |
| 24 | + sys.exit('Usage: createClassifier <xml training data>') |
| 25 | +else: |
| 26 | + datasource = sys.argv[1] |
| 27 | + |
| 28 | +e = ET.parse(datasource).getroot() |
| 29 | +## Construct a dictionary of training phrases keyed by category name |
| 30 | +trainingData = {} |
| 31 | +for rowElem in e.iter('row'): |
| 32 | + text = rowElem.findall('field[@name="device.text"]')[0].text |
| 33 | + cleanText = " ".join(re.sub(r'([^\s\w]|_)+', '', text).split()) ## Preserve spaces and alphanumeric characters only |
| 34 | + category = rowElem.findall('field[@name="category.name"]')[0].text |
| 35 | + categoryName = " ".join(re.sub(r'([^\s\w]|_)+', '', category).split()) |
| 36 | + if categoryName in trainingData: |
| 37 | + trainingData[categoryName].append(cleanText) |
| 38 | + else: |
| 39 | + trainingData[categoryName] = [cleanText] |
| 40 | + |
| 41 | +print('Categories: ' + str(trainingData.keys())) |
| 42 | + |
| 43 | +if USE_API: |
| 44 | + ## Remove classifier if it already exists |
| 45 | + response = requests.delete(UCLASSIFY_BASE_URL + '/' + UCLASSIFY_CLASSIFIER, \ |
| 46 | + headers = {'Authorization': 'Token ' + UCLASSIFY_WRITE_TOKEN }, \ |
| 47 | + json = {} ) |
| 48 | + print(response.status_code) |
| 49 | + |
| 50 | + ## Create the classifier |
| 51 | + response = requests.post(UCLASSIFY_BASE_URL, \ |
| 52 | + headers = {'Authorization': 'Token ' + UCLASSIFY_WRITE_TOKEN }, \ |
| 53 | + json = {'classifierName': UCLASSIFY_CLASSIFIER }) |
| 54 | + print('Create classifier "' + UCLASSIFY_CLASSIFIER + '" : response=' + str(response.status_code)) |
| 55 | + response.raise_for_status() ## The create action should always succeed, so raise fatal if it fails |
| 56 | + print(response.text) |
| 57 | + |
| 58 | +## Create a new uClassify class for each device category |
| 59 | +for categoryName in trainingData.keys(): |
| 60 | + if USE_API: |
| 61 | + response = requests.post(UCLASSIFY_BASE_URL + '/' + UCLASSIFY_CLASSIFIER + '/addClass', \ |
| 62 | + headers = {'Authorization': 'Token ' + UCLASSIFY_WRITE_TOKEN}, \ |
| 63 | + json = {'className':categoryName}) |
| 64 | + print('Create class "' + categoryName +'" : response=' + str(response.status_code)) |
| 65 | + response.raise_for_status() ## Raise fatal if we could not create the class |
| 66 | + |
| 67 | +if USE_API: |
| 68 | + for className in trainingData.keys(): |
| 69 | + print('Training class: "' + className + '"') |
| 70 | + print('Training data : "' + str(trainingData[className]) + '"') |
| 71 | + response = requests.post(UCLASSIFY_BASE_URL + '/' + UCLASSIFY_CLASSIFIER + '/' + className + '/train', \ |
| 72 | + headers = {'Authorization': 'Token ' + UCLASSIFY_WRITE_TOKEN}, \ |
| 73 | + json = {'texts': trainingData[className]}) |
| 74 | + print(response.status_code) |
| 75 | + print(response.text) |
0 commit comments