Browse Source

cell regex

marion 3 years ago
parent
commit
6e25141919
4 changed files with 110 additions and 163 deletions
  1. 44 157
      Cargo.lock
  2. 2 4
      Cargo.toml
  3. 60 0
      src/cell_regex_test/mod.rs
  4. 4 2
      src/main.rs

+ 44 - 157
Cargo.lock

@@ -1,207 +1,94 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-[[package]]
-name = "base64"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff"
-
-[[package]]
-name = "block-buffer"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "cpuid-bool"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
+version = 3
 
 [[package]]
-name = "crypto-mac"
-version = "0.9.1"
+name = "aho-corasick"
+version = "0.7.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58bcd97a54c7ca5ce2f6eb16f6bede5b0ab5f0055fedc17d2f0b4466e21671ca"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
 dependencies = [
- "generic-array",
- "subtle",
-]
-
-[[package]]
-name = "digest"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
-dependencies = [
- "generic-array",
-]
-
-[[package]]
-name = "generic-array"
-version = "0.14.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
-dependencies = [
- "typenum",
- "version_check",
+ "memchr",
 ]
 
 [[package]]
 name = "hello-rust"
 version = "0.1.0"
 dependencies = [
- "hmac",
- "jwt",
- "serde",
- "sha2",
-]
-
-[[package]]
-name = "hmac"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "deae6d9dbb35ec2c502d62b8f7b1c000a0822c3b0794ba36b3149c0a1c840dff"
-dependencies = [
- "crypto-mac",
- "digest",
+ "regex",
+ "walkdir",
 ]
 
 [[package]]
-name = "itoa"
-version = "0.4.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
-
-[[package]]
-name = "jwt"
-version = "0.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0edf66902f26e7304b52abf9091f33227b55b041007fe98b7d12e79d78f7caaa"
-dependencies = [
- "base64",
- "crypto-mac",
- "digest",
- "hmac",
- "serde",
- "serde_json",
- "sha2",
-]
-
-[[package]]
-name = "opaque-debug"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.24"
+name = "memchr"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
-dependencies = [
- "unicode-xid",
-]
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
 
 [[package]]
-name = "quote"
-version = "1.0.8"
+name = "regex"
+version = "1.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
+checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
 dependencies = [
- "proc-macro2",
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
 ]
 
 [[package]]
-name = "ryu"
-version = "1.0.5"
+name = "regex-syntax"
+version = "0.6.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
-
-[[package]]
-name = "serde"
-version = "1.0.123"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
-dependencies = [
- "serde_derive",
-]
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
 
 [[package]]
-name = "serde_derive"
-version = "1.0.123"
+name = "same-file"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
 dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "winapi-util",
 ]
 
 [[package]]
-name = "serde_json"
-version = "1.0.61"
+name = "walkdir"
+version = "2.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a"
+checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
 dependencies = [
- "itoa",
- "ryu",
- "serde",
+ "same-file",
+ "winapi",
+ "winapi-util",
 ]
 
 [[package]]
-name = "sha2"
-version = "0.9.3"
+name = "winapi"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa827a14b29ab7f44778d14a88d3cb76e949c45083f7dbfa507d0cb699dc12de"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
 dependencies = [
- "block-buffer",
- "cfg-if",
- "cpuid-bool",
- "digest",
- "opaque-debug",
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
 ]
 
 [[package]]
-name = "subtle"
-version = "2.4.0"
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e81da0851ada1f3e9d4312c704aa4f8806f0f9d69faaf8df2f3464b4a9437c2"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
-name = "syn"
-version = "1.0.60"
+name = "winapi-util"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
 dependencies = [
- "proc-macro2",
- "quote",
- "unicode-xid",
+ "winapi",
 ]
 
 [[package]]
-name = "typenum"
-version = "1.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
-
-[[package]]
-name = "unicode-xid"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
-
-[[package]]
-name = "version_check"
-version = "0.9.2"
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+ 2 - 4
Cargo.toml

@@ -7,7 +7,5 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-jwt = "0.12.0"
-hmac = "0.9.0"
-sha2 = "0.9.2"
-serde = {version = "1.0", features = ["derive"] }
+regex = "1.5"
+walkdir = "2"

+ 60 - 0
src/cell_regex_test/mod.rs

@@ -0,0 +1,60 @@
+// [dependencies]
+// regex = "1.5"
+// walkdir = "2"
+
+use std::fs::File;
+use std::io;
+use std::io::{BufRead, Write};
+use std::path::Path;
+use walkdir::{DirEntry, WalkDir};
+use regex::Regex;
+
+pub fn main() {
+    // 手机号正则
+    let pattern = Regex::new(r"\D+((13[0-9]|14[5-9]|15[0-3,5-9]|16[2,5,6,7]|17[0-8]|18[0-9]|19[0-3,5-9])\d{8})\D+").unwrap();
+    // 需遍历文件夹
+    let dir = "input_dir_path";
+    let ext = ".sql";
+    // 输出结果文件
+    let mut outfile = File::create("output_file_path").expect("create failed");
+
+    for entry in WalkDir::new(dir).
+        into_iter().filter_entry(|e| !is_hidden(e)) {
+        let entry = entry.unwrap();
+        if !entry.file_name().to_str().unwrap().ends_with(ext) {
+            continue;
+        }
+        println!("读取文件 {}", entry.path().display());
+
+        let mut count = 0;
+        if let Ok(lines) = read_lines(entry.path()) {
+            for line in lines {
+                if let Ok(text) = line {
+                    for caps in pattern.captures_iter(text.as_str()) {
+                        if caps.len() <= 1 {
+                            continue;
+                        }
+
+                        let context = format!("{}\t\n", caps.get(1).unwrap().as_str());
+                        outfile.write_all(context.as_bytes()).expect("write failed");
+                        count += 1;
+                    }
+                }
+            }
+        }
+        println!("获取到 {} 条有效信息", count);
+    }
+}
+
+fn is_hidden(entry: &DirEntry) -> bool {
+    entry.file_name()
+        .to_str()
+        .map(|s| s.starts_with("."))
+        .unwrap_or(false)
+}
+
+fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
+    where P: AsRef<Path>, {
+    let file = File::open(filename)?;
+    Ok(io::BufReader::new(file).lines())
+}

+ 4 - 2
src/main.rs

@@ -8,7 +8,8 @@
 // mod spider_test;
 // mod actix_test;
 // mod guessing_game;
-mod jwt_test;
+// mod jwt_test;
+mod cell_regex_test;
 
 // 将其他模块的成员声明为当前模块直接可用的,等同于java的静态引用
 // 若省略这句,使用时就需要加上模块名,如random_test::Random
@@ -20,6 +21,7 @@ fn main()  {
     // qr_code_test::test(2, 3);
     // spider_test::test("https://www.baidu.com").unwrap();
     // actix_test::http_listener() // return -> std::io::Result<()>
-    jwt_test::main();
+    // jwt_test::main();
+    cell_regex_test::main();
 }