-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_value.h
153 lines (139 loc) · 3.45 KB
/
parse_value.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
解析的数据格式如下:
key1, val1 val2
key2, val1 val2 val3
....
规则:
1. 逗号分隔kv.
2. 空格分隔v.
3. 遇到第一个重复的key,分隔成新的vector
*/
#ifndef ZZ_SPLIT_FILE_TO_MAP
#define ZZ_SPLIT_FILE_TO_MAP
#include <map>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <strings.h>
#include <vector>
#include <stdint.h>
#include <boost/utility/string_ref.hpp>
#include <sstream>
#include "timer.h"
using namespace std;
//value的数据格式可能有三种
enum Type {
Double = 1,
Int,
String,
Unknown
};
//也表示数据格式
union Val {
float f;
int64_t i;
};
void Error(const char *p) {
std::cout << p << std::endl;
}
//每一个value的格式
class Elem {
public:
Type type_;
vector<Val> vec_;
};
//将每一行做成一个键值对的string
void SplitToLine(const string& file, vector<string> &res) {
ifstream ifstr(file.c_str());
uint32_t len = 4096 * 10;
while(!ifstr.eof()) {
char p[len];
memset(p, 0x00, len);
ifstr.getline(p, len);
res.push_back(string(p, strlen(p)));
}
ifstr.close();
}
//将键值对的string做成string类型的key和string类型的value
void SplitStrsToKV(const vector<string> &strs, vector<pair<boost::string_ref, boost::string_ref> > &pairs, const string& spec) {
for (auto& i : strs) {
size_t pos = i.find(spec);
if (string::npos == pos) {
pos = i.size();
}
pairs.push_back(pair<boost::string_ref, boost::string_ref>(boost::string_ref(i.c_str(), pos), boost::string_ref(i.c_str() + pos + 1, i.size() - pos - 1)));
if (string::npos == pos) {
break;
}
}
}
//debug时候能用到
void ShowMap(const map<string, Elem> &m) {
for (auto &i : m) {
std::cout << i.first << "|";
if (i.second.type_ == Type::Double) {
for (auto &j : i.second.vec_) {
std::cout << j.f << " ";
}
}
else {
for (auto &j : i.second.vec_) {
std::cout << j.i << " ";
}
}
std::cout << endl;
}
}
//将value string 分隔成vector,vector内的每个元素都填充好。
template<class T>
void SplitBy(const char*p, const size_t len, const char spec, Elem &elemIn) {
const char *next = p;
while (next && next <= p + len) {
const char* end = strchr(next, spec);
if (!end) end = p + len;
istringstream iss(string(next, end - next + 1));
Val v;
if (elemIn.type_ == Type::Double) {
iss >> v.f;
}
else {
iss >> v.i;
}
elemIn.vec_.push_back(v);
next = end + 1;
}
}
//吐出多个map,遇到第一个相同key的时候分隔。
//总接口,用户需要改变。
void GetMaps(vector<map<string, Elem> > &ms) {
vector<string> res;
//将文件按行做成string,放入res内
SplitToLine("../values_rel.txt", res);
vector<pair<boost::string_ref, boost::string_ref> > kvpairs;
//将按行的string,以逗号分隔做成kv string
SplitStrsToKV(res, kvpairs, ",");
map<string, Elem> m;
for (auto& kv : kvpairs) {
auto ret = m.insert(pair<string, Elem>(string(kv.first), Elem()));
if (!ret.second) {
ms.push_back(m);
m.clear();
m.insert(pair<string, Elem>(string(kv.first), Elem()));
}
auto iter = m.find(string(kv.first));
if (string((kv.first)) == "dense") {
iter->second.type_ = Type::Double;
//将value的字符串转化为vector
SplitBy<double>(kv.second.data(), kv.second.size(), ' ', iter->second);
}
else {
iter->second.type_ = Type::Int;
SplitBy<int64_t>(kv.second.data(), kv.second.size(), ' ', iter->second);
}
}
ms.push_back(m);
//ShowMap(m);
}
#endif