use crate::*;
use std::str::FromStr;
impl FromStr for Cgats {
type Err = BoxErr;
fn from_str(s: &str) -> Result<Self> {
let mut cgats = Cgats::new();
let mut lines = s.lines()
.flat_map(|line| line.split('\r'))
.map(|line| line.trim_end());
cgats.vendor = lines.next().ok_or("NO DATA")?.parse()?;
let mut push = Push::MetaData;
let (mut hit_format, mut hit_data) = (false, false);
for next in lines {
match next.trim() {
"BEGIN_DATA_FORMAT" => {
push = Push::DataFormat;
hit_format = true;
continue;
}
"END_DATA_FORMAT" => {
push = Push::MetaData;
cgats.data = Vec::with_capacity(cgats.len());
continue;
}
"BEGIN_DATA" => {
push = Push::Data;
hit_data = true;
continue;
}
"END_DATA" => {
push = Push::Stop;
continue;
}
_ => (),
}
match &push {
Push::MetaData => cgats.metadata.push(next.parse()?),
Push::DataFormat => cgats.data_format = next.parse()?,
Push::Data => {
for val in next.split_whitespace() {
cgats.data.push(val.parse()?);
}
}
Push::Stop => (),
}
}
if cgats.vendor == Vendor::ColorBurst {
cgats.data_format = DataFormat::colorburst();
}
if let Some(index) = cgats.index_by_field(&SAMPLE_ID) {
let id_column = cgats.get_col_mut(index);
for id in id_column {
*id = id.to_int().unwrap_or_else(|_| id.to_owned());
}
}
if !hit_format && cgats.vendor != Vendor::ColorBurst {
return err!("DATA_FORMAT tag not found");
}
if !hit_data {
return err!("BEGIN_DATA tag not found");
}
if cgats.data_format.is_empty() && !cgats.is_empty() {
return err!("DATA exists, but DATA_FORMAT is empty");
}
if cgats.n_rows() * cgats.n_cols() != cgats.len() {
return err!("rows * cols != len");
}
Ok(cgats)
}
}
#[derive(Debug)]
enum Push {
MetaData,
DataFormat,
Data,
Stop,
}
impl FromStr for MetaData {
type Err = BoxErr;
fn from_str(s: &str) -> Result<Self> {
if s.trim().is_empty() {
return Ok(MetaData::Blank);
}
if s.trim().starts_with('#') {
return Ok(MetaData::Comment(s.to_owned()));
}
let mut split = s.split_whitespace();
let key = split.next().ok_or("MetaData key not found")?.into();
let val = split.collect();
Ok(MetaData::KeyVal { key, val })
}
}
impl FromStr for DataFormat {
type Err = BoxErr;
fn from_str(s: &str) -> Result<Self> {
let s = s.trim();
let mut fields = Vec::new();
for field in s.split_whitespace() {
fields.push(field.parse()?);
}
Ok(DataFormat { fields })
}
}
fn alpha(c: char) -> bool {
!c.is_ascii_digit() && c != '.' && c != '-'
}
#[test]
fn test_has_alpha() {
assert!(alpha('a'));
assert!(alpha('e'));
assert!(alpha('E'));
assert!(alpha('Z'));
assert!(!alpha('1'));
assert!(!alpha('2'));
assert!(!alpha('0'));
assert!(!alpha('9'));
}
impl FromStr for DataPoint {
type Err = BoxErr;
fn from_str(s: &str) -> Result<Self> {
let s = s.trim();
if s.contains(alpha) {
return Ok(Alpha(s.into()));
}
Ok(if let Ok(i) = s.parse() {
Int(i)
} else if let Ok(f) = s.parse() {
Float(f)
} else {
Alpha(s.into())
})
}
}
#[test]
fn data_point_from_str() -> Result<()> {
match "42".parse()? {
Int(i) => if i != 42 { panic!() },
_ => panic!(),
}
match "42.0".parse()? {
Float(f) => if f != 42.0 { panic!() },
_ => panic!(),
}
match "1A1".parse()? {
Alpha(a) => if a != "1A1" { panic!() },
_ => panic!(),
}
match "1E3".parse()? {
Alpha(a) => if a != "1E3" { panic!() },
x => panic!("type is {x:?} but expected Alpha(\"1E3\")"),
}
Ok(())
}
#[test]
fn parse_file() {
use std::{fs::File, io::Read};
let mut cgats = String::new();
File::open("test_files/cgats1.tsv").unwrap().read_to_string(&mut cgats).unwrap();
let cgats: Cgats = cgats.parse().unwrap();
dbg!(&cgats, cgats.n_cols(), cgats.n_rows(), cgats.len());
dbg!(&cgats.data_format);
let mut row1 = cgats.get_row(1).unwrap();
assert_eq!(row1.nth(1), Some(&Alpha("Magenta".into())));
let mut col1 = cgats.get_col(1);
assert_eq!(col1.nth(3), Some(&Alpha("Black".into())));
}
#[test]
fn parse_err() {
let cgats: Result<Cgats> =
"CGATS.17
BEGIN_DATA_FORMAT
END_DATA_FORMAT
BEGIN_DATA
END_DATA"
.parse();
assert!(cgats.is_ok());
let cgats: Result<Cgats> =
"CGATS.17
BEGIN_DATA_FORMAT
END_DATA_FORMAT"
.parse();
assert!(cgats.is_err());
let cgats: Result<Cgats> =
"CGATS.17
BEGIN_DATA
END_DATA"
.parse();
assert!(cgats.is_err());
}