-
Notifications
You must be signed in to change notification settings - Fork 2
/
paper.bib
99 lines (92 loc) · 6.11 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
@article{Snorkel,
abstract = {Labeling training data is increasingly the largest bottleneck in deploying machine learning systems. We present Snorkel, a first-of-its-kind system that enables users to train state-of-the-art models without hand labeling any training data. Instead, users write labeling functions that express arbitrary heuristics, which can have unknown accuracies and correlations. Snorkel denoises their outputs without access to ground truth by incorporating the first end-to-end implementation of our recently proposed machine learning paradigm, data programming. We present a flexible interface layer for writing labeling functions based on our experience over the past year collaborating with companies, agencies, and research laboratories. In a user study, subject matter experts build models 2.8 × faster and increase predictive performance an average 45.5 \% versus seven hours of hand labeling. We study the modeling trade-offs in this new setting and propose an optimizer for automating trade-off decisions that gives up to 1.8 × speedup per pipeline execution. In two collaborations, with the US Department of Veterans Affairs and the US Food and Drug Administration, and on four open-source text and image data sets representative of other deployments, Snorkel provides 132 \% average improvements to predictive performance over prior heuristic approaches and comes within an average 3.60 \% of the predictive performance of large hand-curated training sets.},
author = {Ratner, Alexander and Bach, Stephen H. and Ehrenberg, Henry and Fries, Jason and Wu, Sen and R{\'{e}}, Christopher},
doi = {10.1007/s00778-019-00552-1},
issn = {0949877X},
journal = {VLDB Journal},
keywords = {Machine learning,Training data,Weak supervision,data programming},
number = {2-3},
pages = {709--730},
publisher = {Springer Berlin Heidelberg},
title = {{Snorkel: rapid training data creation with weak supervision}},
url = {https://doi.org/10.1007/s00778-019-00552-1},
volume = {29},
year = {2020}
}
@misc{LabelStudio,
title={{Label Studio}: Data labeling software},
url={https://github.com/heartexlabs/label-studio},
note={Open source software available from https://github.com/heartexlabs/label-studio},
author={
Maxim Tkachenko and
Mikhail Malyuk and
Nikita Shevchenko and
Andrey Holmanyuk and
Nikolai Liubimov},
year={2020-2021},
}
@inproceedings{MLflow,
author = {Chen, Andrew and Chow, Andy and Davidson, Aaron and DCunha, Arjun and Ghodsi, Ali and Hong, Sue Ann and Konwinski, Andy and Mewald, Clemens and Murching, Siddharth and Nykodym, Tomas and Ogilvie, Paul and Parkhe, Mani and Singh, Avesh and Xie, Fen and Zaharia, Matei and Zang, Richard and Zheng, Juntai and Zumar, Corey},
title = {Developments in MLflow: A System to Accelerate the Machine Learning Lifecycle},
year = {2020},
isbn = {9781450380232},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3399579.3399867},
doi = {10.1145/3399579.3399867},
abstract = {MLflow is a popular open source platform for managing ML development, including experiment
tracking, reproducibility, and deployment. In this paper, we discuss user feedback
collected since MLflow was launched in 2018, as well as three major features we have
introduced in response to this feedback: a Model Registry for collaborative model
management and review, tools for simplifying ML code instrumentation, and experiment
analytics functions for extracting insights from millions of ML experiments.},
booktitle = {Proceedings of the Fourth International Workshop on Data Management for End-to-End Machine Learning},
articleno = {5},
numpages = {4},
location = {Portland, OR, USA},
series = {DEEM'20}
}
@misc{FastAPI,
author = {Sebastián Ramírez},
title = {FastAPI},
year = {2021},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/tiangolo/fastapi}},
commit = {7b6e198d314e320256c2ed8b62430b2a42c31cb5}
}
@inproceedings{Gutierrez2021,
author = {Gutierrez, Candelario A. Gutierrez and Whittaker, Andrea and Patenio, Katherine Mae and Gehman, Joel and Lefsrud, Lianne M. and Barbosa, Denilson and Stroulia, Eleni},
title = {Analyzing and Visualizing Twitter Conversations},
year = {2021},
publisher = {IBM Corp.},
address = {USA},
abstract = {Social media platforms are public venues where conversations about issues of public interest take place. Much recent research has been devoted to evaluating the degree to which online conversations capture public opinion on issues of broad societal interest. We describe a robust and scalable platform to support such studies. Our platform allows the analysis of three semantic aspects of tweets, namely the personal values, sentiment, and humor expressed in them, as well as the public's engagement with them. In addition, it aggregates these indicators at the level of tweet authors to shed light on the activities and style of influencers of public opinion. Finally, it offers rich visualizations to enable users to gain insights on their datasets. We demonstrate the usefulness of our platform with two case studies: (a) analyzing the fragmented narratives around established (hydro, oil and gas, coal, nuclear) and new (solar, wind, geothermal, biomass) energy sources; and (b) comparing the social-media brands of academic institutions.},
booktitle = {Proceedings of the 31st Annual International Conference on Computer Science and Software Engineering},
pages = {4–13},
numpages = {10},
keywords = {energy narratives, personal values, academic institutions' brands, humor, social media},
location = {Toronto, Canada},
series = {CASCON '21}
}
@misc{SnorkelFlow,
title={{Snorkel Flow}},
url={https://snorkel.ai/platform/},
note={Proprietary software available from https://snorkel.ai/platform/},
author={Snorkel AI, Inc.},
year={2021}
}
@misc{Neuro,
title={{Neu.ro}},
url = {https://neu.ro/},
note = {MLOps Platform available from https://neu.ro/neuro-mlops/},
author = {Neu.ro},
year = {2022}
}
@misc{MLRun,
author = {Iguazio, Ltd.},
title = {{MLRun}},
year = {2022},
url = {https://github.com/mlrun/mlrun},
commit = {33ca54c34c40dc48dbe5d9108d690f20c2a83b87}
}