Skip to content

Commit 72c0ba4

Browse files
committed
Code cleanup part #1
1 parent f76e310 commit 72c0ba4

File tree

3 files changed

+83
-82
lines changed

3 files changed

+83
-82
lines changed

sql/sql_statistics.cc

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,14 +1179,17 @@ class Column_stat: public Stat_table
11791179
table_field->read_stats->set_avg_frequency(stat_field->val_real());
11801180
break;
11811181
case COLUMN_STAT_HIST_SIZE:
1182-
//TODO: ignore this. The size is a part of histogram!
1183-
//table_field->read_stats->histogram.set_size(stat_field->val_int());
1182+
/*
1183+
Ignore the contents of mysql.column_stats.hist_size. We take the
1184+
size from the mysql.column_stats.histogram column, itself.
1185+
*/
11841186
break;
11851187
case COLUMN_STAT_HIST_TYPE:
1186-
// TODO: save this next to histogram.
1187-
// For some reason, the histogram itself is read in
1188-
// read_histograms_for_table
11891188
{
1189+
/*
1190+
Save the histogram type. The histogram itself will be read in
1191+
read_histograms_for_table().
1192+
*/
11901193
Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
11911194
1);
11921195
table_field->read_stats->histogram_type_on_disk= hist_type;
@@ -1247,29 +1250,32 @@ class Column_stat: public Stat_table
12471250
table_field->read_stats->histogram_= hist;
12481251
return hist;
12491252
}
1250-
//memcpy(table_field->read_stats->histogram_.get_values(),
1251-
// val.ptr(), table_field->read_stats->histogram.get_size());
12521253
}
12531254
return NULL;
12541255
}
12551256
};
12561257

1257-
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg)
1258+
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *,
1259+
Histogram_type type_arg,
1260+
const uchar *ptr_arg, uint size_arg)
12581261
{
12591262
// Just copy the data
12601263
size = (uint8) size_arg;
12611264
type = type_arg;
1262-
values = (uchar*)alloc_root(mem_root, size_arg);
1263-
memcpy(values, ptr_arg, size_arg);
1264-
return false;
1265+
if ((values = (uchar*)alloc_root(mem_root, size_arg)))
1266+
{
1267+
memcpy(values, ptr_arg, size_arg);
1268+
return false;
1269+
}
1270+
return true;
12651271
}
12661272

12671273
/*
12681274
Save the histogram data info a table field.
12691275
*/
12701276
void Histogram_binary::serialize(Field *field)
12711277
{
1272-
field->store((char*)get_values(), get_size(), &my_charset_bin);
1278+
field->store((char*)values, size, &my_charset_bin);
12731279
}
12741280

12751281
void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
@@ -1282,20 +1288,32 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
12821288
}
12831289

12841290

1285-
void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size_arg)
1291+
void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
1292+
Histogram_type htype_arg,
1293+
ulonglong size_arg)
12861294
{
12871295
type= htype_arg;
1288-
values = (uchar*)alloc_root(mem_root, size_arg);
1289-
size = (uint8) size_arg;
1296+
//values_ = (uchar*)alloc_root(mem_root, size_arg);
1297+
size= (uint8) size_arg;
12901298
}
12911299

1292-
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg)
1300+
1301+
/*
1302+
@brief
1303+
Parse the histogram from its on-disk representation
1304+
1305+
*/
1306+
1307+
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
1308+
Histogram_type type_arg, const uchar *ptr,
1309+
uint size_arg)
12931310
{
12941311
DBUG_ENTER("Histogram_json::parse");
12951312
size = (uint8) size_arg;
12961313
type = type_arg;
12971314
const char *json = (char *)ptr;
12981315
int vt;
1316+
std::vector<std::string> hist_buckets_text;
12991317
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
13001318
if (!result)
13011319
{
@@ -1482,6 +1500,8 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
14821500
}
14831501
return sel;
14841502
}
1503+
1504+
14851505
/*
14861506
@param field The table field histogram is for. We don't care about the
14871507
field's current value, we only need its virtual functions to
@@ -1492,14 +1512,13 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
14921512
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
14931513
key_range *max_endp)
14941514
{
1495-
//fprintf(stderr, "Histogram_json::range_selectivity\n");
14961515
double min = 0.0, max = 1.0;
14971516
double width = 1.0/(int)histogram_bounds.size();
14981517
if (min_endp)
14991518
{
15001519
double min_sel = 0.0;
15011520
const uchar *min_key= min_endp->key;
1502-
// TODO: also, properly handle SQL NULLs.
1521+
// GSOC-TODO: properly handle SQL NULLs.
15031522
// in this test patch, we just assume the values are not SQL NULLs.
15041523
if (field->real_maybe_null())
15051524
min_key++;
@@ -1573,8 +1592,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
15731592

15741593
void Histogram_json::serialize(Field *field)
15751594
{
1576-
field->store((char*)get_values(), strlen((char*)get_values()),
1577-
&my_charset_bin);
1595+
field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin);
15781596
}
15791597

15801598
int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
@@ -1583,7 +1601,7 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
15831601
int high = (int)histogram_bounds.size()-1;
15841602
int mid;
15851603
int min_bucket_index = -1;
1586-
std::string mid_val;
1604+
std::string mid_val; // GSOC-todo: don't copy strings
15871605

15881606
while(low <= high) {
15891607
// c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up).
@@ -2037,9 +2055,9 @@ class Histogram_builder_json : public Histogram_builder
20372055
writer->add_str(value.c_str());
20382056
}
20392057
writer->end_array();
2040-
histogram->set_size(bucket_bounds.size());
20412058
Binary_string *json_string = (Binary_string *) writer->output.get_string();
2042-
((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr());
2059+
Histogram_json *hist= (Histogram_json*)histogram;
2060+
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
20432061
}
20442062
};
20452063

@@ -2207,6 +2225,7 @@ class Count_distinct_field: public Sql_alloc
22072225
*/
22082226
void walk_tree_with_histogram(ha_rows rows)
22092227
{
2228+
// GSOC-TODO: is below a meaningful difference:
22102229
if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
22112230
{
22122231
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
@@ -2680,11 +2699,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
26802699
if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
26812700
{
26822701
column_stats->histogram_ = NULL;
2683-
/*
2684-
column_stats->histogram.set_size(hist_size);
2685-
column_stats->histogram.set_type(hist_type);
2686-
column_stats->histogram.set_values(histogram);
2687-
histogram+= hist_size;*/
26882702
(*field_ptr)->collected_stats= column_stats++;
26892703
}
26902704
}
@@ -2950,9 +2964,9 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
29502964
}
29512965
if (count_distinct)
29522966
{
2953-
//uint hist_size= count_distinct->get_hist_size();
29542967
uint hist_size= current_thd->variables.histogram_size;
2955-
Histogram_type hist_type= (Histogram_type) (current_thd->variables.histogram_type);
2968+
Histogram_type hist_type=
2969+
(Histogram_type) (current_thd->variables.histogram_type);
29562970
bool have_histogram= false;
29572971
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
29582972
{
@@ -3001,12 +3015,11 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
30013015
}
30023016
else
30033017
have_histogram= false ; // TODO: need this?
3004-
//histogram.set_size(hist_size);
3018+
30053019
set_not_null(COLUMN_STAT_HIST_SIZE);
30063020
if (have_histogram && distincts)
30073021
{
30083022
set_not_null(COLUMN_STAT_HIST_TYPE);
3009-
//histogram.set_values(count_distinct->get_histogram());
30103023
histogram_= count_distinct->get_histogram();
30113024
set_not_null(COLUMN_STAT_HISTOGRAM);
30123025
}

sql/sql_statistics.h

Lines changed: 36 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -157,22 +157,17 @@ class Histogram_base : public Sql_alloc
157157

158158
virtual uint get_width()=0;
159159

160-
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0;
160+
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
161+
ulonglong size)=0;
161162

162163
virtual bool is_available()=0;
163164

164165
virtual bool is_usable(THD *thd)=0;
165166

166-
virtual void set_values(uchar * values)=0;
167-
168-
virtual uchar *get_values()=0;
169-
170-
virtual void set_size(ulonglong sz)=0;
171-
172-
virtual double point_selectivity(Field *field, key_range *endpoint, double avg_selection)=0;
173-
167+
virtual double point_selectivity(Field *field, key_range *endpoint,
168+
double avg_selection)=0;
174169
virtual double range_selectivity(Field *field, key_range *min_endp,
175-
key_range *max_endp)=0;
170+
key_range *max_endp)=0;
176171

177172
// Legacy: return the size of the histogram on disk.
178173
// This will be stored in mysql.column_stats.hist_size column.
@@ -181,6 +176,11 @@ class Histogram_base : public Sql_alloc
181176
virtual ~Histogram_base()= default;
182177
};
183178

179+
180+
/*
181+
A Height-balanced histogram that stores numeric fractions
182+
*/
183+
184184
class Histogram_binary : public Histogram_base
185185
{
186186
public:
@@ -274,17 +274,12 @@ class Histogram_binary : public Histogram_base
274274
return i;
275275
}
276276

277-
uchar *get_values() override { return (uchar *) values; }
278277
public:
279278
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
280279

281-
// Note: these two are used only for saving the JSON text:
282-
void set_values (uchar *vals) override { values= (uchar *) vals; }
283-
void set_size (ulonglong sz) override { size= (uint8) sz; }
284-
285280
uint get_size() override {return (uint)size;}
286281

287-
bool is_available() override { return get_size() > 0 && get_values(); }
282+
bool is_available() override { return get_size() > 0 && (values!=NULL); }
288283

289284
/*
290285
This function checks that histograms should be usable only when
@@ -328,58 +323,57 @@ class Histogram_binary : public Histogram_base
328323
}
329324

330325
double range_selectivity(Field *field, key_range *min_endp,
331-
key_range *max_endp) override;
332-
326+
key_range *max_endp) override;
327+
333328
/*
334329
Estimate selectivity of "col=const" using a histogram
335330
*/
336-
double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override;
331+
double point_selectivity(Field *field, key_range *endpoint,
332+
double avg_sel) override;
337333
};
338334

335+
336+
/*
337+
An equi-height histogram which stores real values for bucket bounds.
338+
*/
339+
339340
class Histogram_json : public Histogram_base
340341
{
341342
private:
342343
Histogram_type type;
343344
uint8 size; /* Number of elements in the histogram*/
344-
345-
/*
346-
GSOC-TODO: This is used for storing collected JSON text. Rename it
347-
accordingly.
348-
*/
349-
uchar *values;
350-
351-
// List of values in string form.
352-
/*
353-
GSOC-TODO: We don't need to save this. It can be a local variable in
354-
parse().
355-
Eventually we should get rid of this at all, as we can convert the
356-
endpoints and add them to histogram_bounds as soon as we've read them.
357-
*/
358-
std::vector<std::string> hist_buckets_text;
359345

346+
/* Collection-time only: collected histogram in the JSON form. */
347+
uchar *json_text;
348+
360349
// Array of histogram bucket endpoints in KeyTupleFormat.
361350
std::vector<std::string> histogram_bounds;
362351

363352
public:
364353
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
365-
const uchar *ptr, uint size) override;
354+
const uchar *ptr, uint size) override;
366355

367356
void serialize(Field *field) override;
368357

369358
// returns number of buckets in the histogram
370359
uint get_width() override
371360
{
372-
return size;
373-
};
361+
return size;
362+
}
374363

375364
Histogram_type get_type() override
376365
{
377366
return JSON_HB;
378367
}
379368

380-
void set_size (ulonglong sz) override {size = (uint8) sz; }
369+
void set_json_text(ulonglong sz, uchar *json_text_arg)
370+
{
371+
size = (uint8) sz;
372+
json_text= json_text_arg;
373+
}
381374

382-
uint get_size() override {
375+
uint get_size() override
376+
{
383377
return size;
384378
}
385379

@@ -393,15 +387,10 @@ class Histogram_json : public Histogram_base
393387
is_available();
394388
}
395389

396-
void set_values (uchar *vals) override { values= (uchar *) vals; }
397-
398-
uchar *get_values() override { return (uchar *) values; }
399-
400-
double point_selectivity(Field *field, key_range *endpoint, double avg_selection) override;
401-
390+
double point_selectivity(Field *field, key_range *endpoint,
391+
double avg_selection) override;
402392
double range_selectivity(Field *field, key_range *min_endp,
403-
key_range *max_endp) override;
404-
393+
key_range *max_endp) override;
405394
/*
406395
* Returns the index of the biggest histogram value that is smaller than endpoint
407396
*/

strings/json_lib.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,7 @@ int json_path_compare(const json_path_t *a, const json_path_t *b,
18691869

18701870

18711871
enum json_types json_smart_read_value(json_engine_t *je,
1872-
const char **value, int *value_len)
1872+
const char **value, int *value_len)
18731873
{
18741874
if (json_read_value(je))
18751875
goto err_return;
@@ -1952,6 +1952,7 @@ enum json_types json_get_array_item(const char *js, const char *js_end,
19521952
return JSV_BAD_JSON;
19531953
}
19541954

1955+
19551956
/** Simple json lookup for a value by the key.
19561957
19571958
Expects JSON object.
@@ -2027,8 +2028,6 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
20272028
return JSV_NOTHING;
20282029
}
20292030

2030-
2031-
20322031
/** Check if json is valid (well-formed)
20332032
20342033
@retval 0 - success, json is well-formed

0 commit comments

Comments
 (0)