package main import ( "bufio" "compress/gzip" "fmt" "os" "time" xmlparser "github.com/tamerh/xml-stream-parser" // Dá se získat pomocí "go get -u github.com/tamerh/xml-stream-parser" ) // Funkce na vytáhnutí tagu daného elementu func getTag(el *xmlparser.XMLElement, name string) (string, bool) { for _, tag := range el.Childs["tag"] { if tag.Attrs["k"] == name { return tag.Attrs["v"], true } } return "", false } // Funkce na kontrolu jestli má tag daný element (bez kontroly hodnoty) func hasTag(el *xmlparser.XMLElement, name string) bool { _, found := getTag(el, name) return found } func die(exitcode int, format string, a ...interface{}) { fmt.Fprintf(os.Stderr, format, a...) os.Exit(1) } func main() { if len(os.Args) != 2 { die(1, "Usage: %s \n", os.Args[0]) } // 1. Otevřeme soubor file, err := os.Open(os.Args[1]) if err != nil { die(1, "Cannot open file: %v\n", err) } // 2. Gzip streamový reader gzipReader, err := gzip.NewReader(file) if err != nil { die(1, "Cannot construct gzip stream reader: %v\n", err) } // 3. XML streamový parser buffer := bufio.NewReaderSize(gzipReader, 1024*1024*256) parser := xmlparser.NewXMLParser(buffer, "node", "way", "relation") streets := map[string]int{} nodes, ways, relations := 0, 0, 0 start := time.Now() // funkce na vypisování statistiky printStat := func() { megabytes := float64(parser.TotalReadSize) / 1024 / 1024 elapsed := time.Now().Sub(start) fmt.Fprintf(os.Stderr, "Elapsed: %-20s Read: %10.2fMB (%.2fMB/s)\tNodes: %8d\tWays: %8d\tRelations: %8d\tFound streets: %8d\r", elapsed, megabytes, megabytes/time.Now().Sub(start).Seconds(), nodes, ways, relations, len(streets), ) } // 4. Čteme element po elementu for xml := range parser.Stream() { switch xml.Name { case "node": nodes++ case "way": ways++ if hasTag(xml, "highway") { if name, found := getTag(xml, "name"); found { streets[name]++ } } case "relation": relations++ } // Vypíšeme statistiku každých 10000 načtených elementů if (nodes+ways+relations)%10000 == 0 { printStat() } } printStat() fmt.Fprintf(os.Stderr, "\n\nTotal time: %v\nTotal unique streets: %d\n", time.Now().Sub(start), len(streets)) for street, count := range streets { fmt.Printf("%s\t%d\n", street, count) } }