Commit a090cdb

Richard Luby <richluby@gmail.com>
2016-12-21 09:34:36
reworked tags to be heirarchical
tags populated and helper functions defined to simplify working with tags
1 parent 2dc3fd8
question.go
@@ -9,10 +9,40 @@ import (
 	"strings"
 )
 
+// buildTagPath creates the heirarchical list of tags
+// given the linearly related tag relationship
+// returns the index of the newly created category
+func buildTagPath(tags []string, parent int, currentTag int) int {
+	if len(tags) <= currentTag {
+		return parent
+	}
+	if newParent := getNearestParentCategory(tags[currentTag : len(tags)-1]); newParent >= 0 {
+		numParents := 0
+		for checkForParent := categories[newParent]; checkForParent.Parent >= 0; checkForParent = categories[checkForParent.Parent] {
+			numParents++
+		}
+		return buildTagPath(tags, newParent, currentTag+numParents) // should increment by numTagsFound
+	}
+	category := Category{}
+	category.Value = tags[currentTag]
+	category.Parent = parent
+	categories = append(categories, category)
+	category.Children = append(category.Children,
+		len(categories)-1)
+	//log.Printf("Node (%s) added for child of %s with tag %s: %+v", categories[parent].FullCategoryPath(), tags[currentTag], tags)
+	return buildTagPath(tags, len(categories)-1, currentTag+1)
+}
+
 // assignCategory figures out what category
 // this question is a member of
-func assignCategory(category string, record *Record) {
-
+func assignCategory(file *os.File) int {
+	tagPath := strings.Replace(file.Name(), serverConfig.QUESTIONS, "", 1)
+	tags := strings.Split(tagPath, string(os.PathSeparator))
+	tags[len(tags)-1] = strings.Replace(tags[len(tags)-1], ".csv", "", -1)
+	if tags[0] == "" {
+		tags = tags[1:]
+	}
+	return buildTagPath(tags, -1, 0)
 }
 
 // parseLine parses a csv line and returns a single record
@@ -29,11 +59,9 @@ func parseLine(line string) error {
 	}
 	record.Question = strings.TrimSpace(tokens[0])
 	record.Answer = strings.TrimSpace(tokens[1])
-	category := strings.TrimSpace(strings.ToLower(tokens[2]))
 	if len(tokens) >= 4 {
 		record.Reference = strings.TrimSpace(tokens[3])
 	}
-	assignCategory(category, &record)
 	recordsArray = append(recordsArray, record)
 	return nil
 }
@@ -47,6 +75,7 @@ func LoadFile(file *os.File) error {
 	if !strings.HasSuffix(file.Name(), ".csv") {
 		return nil
 	}
+	catIndex := assignCategory(file)
 	scanner := bufio.NewScanner(file)
 	for scanner.Scan() {
 		if err := parseLine(scanner.Text()); err != nil {
@@ -61,6 +90,7 @@ func LoadFile(file *os.File) error {
 // loadRecords loads the records into memory according to
 // the supplied configuration for the given file.
 func loadRecords(path string, fileInfo os.FileInfo, err error) error {
+	log.Printf("cats: %+v", categories)
 	file, err := os.Open(path)
 	if err != nil {
 		return err
structures.go
@@ -1,6 +1,10 @@
 package main
 
-import "sync"
+import (
+	"strconv"
+	"strings"
+	"sync"
+)
 
 // API_ROOT defines the root path for the web api interface
 const API_ROOT = "/api"
@@ -104,7 +108,7 @@ type Category struct {
 // category, or the empty string if the PathIndex
 // variable is too large
 func (cat Category) String() string {
-	return cat.Value
+	return cat.FullCategoryPath() + " " + strconv.Itoa(cat.Parent)
 }
 
 // uses lazy evaluation and short circuting
@@ -119,6 +123,37 @@ func (cat Category) FullCategoryPath() string {
 	return categories[cat.Parent].FullCategoryPath() + CATEGORY_SEPARATOR + cat.Value
 }
 
+// IsDescendentOfPath returns true if the category is a descendant
+// of the given path
+func (cat Category) IsAncestorOfPath(path []string) bool {
+	pathToCheck := strings.Join(path, ":")
+	answer := (len(pathToCheck) > len(cat.FullCategoryPath())) && strings.HasPrefix(pathToCheck, cat.FullCategoryPath())
+	return answer
+}
+
+// getParentCategory returns the index of the parent tag for the
+// lowest descendant if the path exists. The entire path MUST
+// match exactly for the function to return true.
+// a negative value is returned if it does not exist
+func getNearestParentCategory(tags []string) int {
+	if len(tags) <= 0 {
+		return -1
+	}
+	tagFoundIndex := -1
+	for i, cat := range categories {
+		if cat.IsAncestorOfPath(tags) {
+			if tagFoundIndex < 0 {
+				tagFoundIndex = i
+			} else { // pick longest path
+				if len(cat.FullCategoryPath()) > len(categories[tagFoundIndex].FullCategoryPath()) {
+					tagFoundIndex = i
+				}
+			}
+		}
+	}
+	return tagFoundIndex
+}
+
 // P incrememnts the counter here
 // note: race conditions can totally happen
 func (cSem *CountingSemaphore) P() {
@@ -157,7 +192,6 @@ func (cSem *CountingSemaphore) SetCapacity(cap int) {
 func init() {
 	recordsArray = []Record{}
 	usedRecordsArray = []Record{}
-	categories = []Category{Category{Value: "Cyber",
-		Parent: -1}}
+	categories = []Category{}
 	mapLock = &sync.Mutex{}
 }
structures_test.go
@@ -0,0 +1,44 @@
+package main
+
+// tests the structures available to the program
+import "testing"
+
+// TestTreeLinkage ensures that the correct parent category is returned
+func TestTreeLinkage(t *testing.T) {
+	catPath := []string{"Cyber", "linux", "shell", "bash"}
+	for i, str := range catPath {
+		categories = append(categories, Category{Parent: i - 1,
+			Value: str})
+	}
+	t.Run("Checking ancestry...", func(t *testing.T) {
+		cat := getNearestParentCategory(catPath)
+		if cat < 0 {
+			t.Errorf("Improper parent index return: %d", cat)
+			t.FailNow()
+		}
+		if categories[cat].FullCategoryPath() != "Cyber:linux:shell" {
+			t.Errorf("Cannot find parent. Recevied: %+v for: %v", categories[cat].FullCategoryPath(),
+				catPath[0:len(catPath)])
+		}
+	})
+}
+
+// TestBuildTree tests the functions necessary for building the tag tree
+func TestBuildTree(t *testing.T) {
+	categories = []Category{}
+	catPath := []string{"Cyber", "linux", "shell", "bash"}
+	catIndex := buildTagPath(catPath, -1, 0)
+	if catIndex != len(catPath)-1 {
+		t.Errorf("Catindex wrong: %d", catIndex)
+	}
+	if len(categories) != len(catPath) {
+		t.Errorf("Categories wrong len: %d\t%+v", len(categories), categories)
+		t.FailNow()
+	}
+	for i, cat := range categories {
+		if i >= len(catPath) || cat.Value != catPath[i] {
+			t.Errorf("Improper category: %s for index %d", cat.Value, i)
+			t.FailNow()
+		}
+	}
+}